relaton-iso 0.8.0 → 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a73ac19c4da469921da4bad644b50f148d750476852e1a43c85c2cf0ffe52b87
4
- data.tar.gz: b7bf3b134c1dc1d851b136a78d35241572cd9706f4c451791e268932b4a68b70
3
+ metadata.gz: 23cfcc3b3c94cf0988d6087a1afacb4bcc6065249b4f6785879e8242384bf00d
4
+ data.tar.gz: f973dc6ba692b89acd1c6b4f9f75b9261ca7aff09557ed58605a7b1b8e499719
5
5
  SHA512:
6
- metadata.gz: bd884caa8c53e3d4b020335e1adbf2abab31f657069a249f821e4cac21a1d126b69d8d3c281058217b3d3aaa9c5f2f4a6d6b361ca328c8a0872fb5e1cba16565
7
- data.tar.gz: 0c92680ecce087a3437176f5d980b59e5fca2416d9f99ac5cdd094b93106c7286cf2f9fded7544c156b54aecf988dc83917c2cf08ddfb8fa05c6f8049bf578d2
6
+ metadata.gz: 8d88890b35c169076ceb3350eedb036116a4fe30403ae23e6e9300718d5cfd8d6d37f49a6724bdb2c4b459a7782bd429cab660d4a195bcef92ac0dc347021c6f
7
+ data.tar.gz: e68d82ea105bba5dc0ab989a35eb02062adefdb45302c17065f9b47b3bf3f3b6eb9ea4e3caa43177782235013f984ea6fe574cad8e95fe1a535319d63c8b573e
@@ -183,6 +183,36 @@ RelatonIso::IsoBibliography.get('ISO 19115', "2014", {all_parts: true}).title
183
183
  @type="main">]
184
184
  ----
185
185
 
186
+ === Get specific language
187
+
188
+ [source,ruby]
189
+ ----
190
+ item = RelatonIso::IsoBibliography.get 'ISO 19115', nil, {lang: "en"}
191
+ item.to_xml
192
+ => <bibitem id="ISO19115-1-2014" type="standard">
193
+ <fetched>2020-01-22</fetched>
194
+ <title type="title-intro" format="text/plain" language="en" script="Latn">Geographic information</title>
195
+ <title type="title-main" format="text/plain" language="en" script="Latn">Metadata</title>
196
+ <title type="main" format="text/plain" language="en" script="Latn">Geographic information – Metadata</title>
197
+ <uri type=\"src\">https://www.iso.org/standard/53798.html</uri>
198
+ ...
199
+ </bibitem>
200
+
201
+ item = RelatonIso::IsoBibliography.get 'ISO 19115', nil, {lang: "fr"}
202
+ item.to_xml
203
+ => <bibitem id="ISO19115-1-2014" type="standard">
204
+ <fetched>2020-01-22</fetched>
205
+ <title type="title-intro" format="text/plain" language="en" script="Latn">Geographic information</title>
206
+ <title type="title-main" format="text/plain" language="en" script="Latn">Metadata</title>
207
+ <title type="main" format="text/plain" language="en" script="Latn">Geographic information – Metadata</title>
208
+ <title type="title-intro" format="text/plain" language="fr" script="Latn">Information géographique</title>
209
+ <title type="title-main" format="text/plain" language="fr" script="Latn">Métadonnées</title>
210
+ <title type="main" format="text/plain" language="fr" script="Latn">Information géographique – Métadonnées</title>
211
+ <uri type="src">https://www.iso.org/standard/53798.html</uri>
212
+ ...
213
+ </bibitem>
214
+ ----
215
+
186
216
  == Development
187
217
 
188
218
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -7,9 +7,10 @@ module RelatonIso
7
7
  attr_reader :hit_collection
8
8
 
9
9
  # Parse page.
10
+ # @param lang [String, NilClass]
10
11
  # @return [RelatonIso::IsoBibliographicItem]
11
- def fetch
12
- @fetch ||= Scrapper.parse_page @hit
12
+ def fetch(lang = nil)
13
+ @fetch ||= Scrapper.parse_page @hit, lang
13
14
  end
14
15
 
15
16
  # @param builder [Nokogiri::XML::Builder]
@@ -10,25 +10,23 @@ module RelatonIso
10
10
 
11
11
  def_delegators :@array, :<<, :[], :first, :empty?, :any?, :size
12
12
 
13
- # @return [TrueClass, FalseClass]
14
- # attr_reader :fetched
15
-
16
- # @return [RelatonIso::HitPages]
17
- # attr_reader :hit_pages
18
-
19
- # @return [String]
13
+ # @return [String, NilClass]
20
14
  attr_reader :text
21
15
 
22
- # @param hits [Array<Hash>]
16
+ # @param text [String] reference to search
23
17
  def initialize(text)
24
18
  @array = []
25
19
  @text = text
26
- %r{\s(?<num>\d+)(-(?<part>\d+))?} =~ text
20
+ %r{\s(?<num>\d+)(-(?<part>[\d-]+))?} =~ text
27
21
  http = Net::HTTP.new "www.iso.org", 443
28
22
  http.use_ssl = true
29
23
  search = ["status=ENT_ACTIVE,ENT_PROGRESS,ENT_INACTIVE,ENT_DELETED"]
30
24
  search << "docNumber=#{num}"
31
25
  search << "docPartNo=#{part}" if part
26
+ # if year
27
+ # search << "stageDateStart=#{Date.new(year.to_i).strftime("%Y-%m-%d")}"
28
+ # search << "stageDateEnd=#{Date.new(year.to_i, 12, 31).strftime("%Y-%m-%d")}"
29
+ # end
32
30
  q = search.join "&"
33
31
  resp = http.get("/cms/render/live/en/sites/isoorg.advancedSearch.do?#{q}",
34
32
  "Accept" => "application/json, text/plain, */*")
@@ -55,27 +53,14 @@ module RelatonIso
55
53
  self
56
54
  end
57
55
 
58
- # @return [RelatonIso::HitCollection]
59
- # def fetch
60
- # return self if @fetched
61
-
62
- # workers = RelatonBib::WorkersPool.new 4
63
- # workers.worker(&:fetch)
64
- # @array.each do |hit|
65
- # workers << hit
66
- # end
67
- # workers.end
68
- # workers.result
69
- # @fetched = true
70
- # self
71
- # end
72
-
73
- def to_all_parts
56
+ # @param lang [String, NilClass]
57
+ # @return [RelatonIsoBib::IsoBibliographicItem]
58
+ def to_all_parts(lang = nil)
74
59
  parts = @array.select { |h| !h.hit["docPart"].empty? }
75
60
  hit = parts.min_by { |h| h.hit["docPart"].to_i }
76
- return @array.first.fetch unless hit
61
+ return @array.first.fetch lang unless hit
77
62
 
78
- bibitem = hit.fetch
63
+ bibitem = hit.fetch lang
79
64
  bibitem.to_all_parts
80
65
  parts.reject { |h| h.hit["docRef"] == hit.hit["docRef"] }.each do |hi|
81
66
  isobib = RelatonIsoBib::IsoBibliographicItem.new(
@@ -19,24 +19,19 @@ module RelatonIso
19
19
  raise RelatonBib::RequestError, "Could not access http://www.iso.org"
20
20
  end
21
21
 
22
- # @param text [String]
23
- # @return [Array<RelatonIso::IsoBibliographicItem>]
24
- # def search_and_fetch(text)
25
- # Scrapper.get(text)
26
- # end
27
-
28
22
  # @param ref [String] the ISO standard Code to look up (e..g "ISO 9000")
29
- # @param year [String] the year the standard was published (optional)
30
- # @param opts [Hash] options; restricted to :all_parts if all-parts reference is required,
31
- # :keep_year if undated reference should return actual reference with year
23
+ # @param year [String, NilClass] the year the standard was published
24
+ # @param opts [Hash] options; restricted to :all_parts if all-parts
25
+ # reference is required, :keep_year if undated reference should
26
+ # return actual reference with year
32
27
  # @return [String] Relaton XML serialisation of reference
33
- def get(ref, year, opts)
28
+ def get(ref, year = nil, opts = {})
34
29
  opts[:ref] = ref
35
30
 
36
31
  %r{
37
32
  ^(?<code1>[^\s]+\s[^/]+) # match code
38
33
  /?
39
- (?<corr>(Amd|DAmd|(CD|WD|AWI|NP)\sAmd|Cor|CD\sCor|FDAmd)\s\d+ # correction name
34
+ (?<corr>(Amd|DAmd|(CD|WD|AWI|NP)\sAmd|Cor|CD\sCor|FDAmd|PRF\sAmd)\s\d+ # correction name
40
35
  :?(\d{4})?(/Cor\s\d+:\d{4})?) # match correction year
41
36
  }x =~ ref
42
37
  code = code1 || ref
@@ -122,7 +117,7 @@ module RelatonIso
122
117
  # @param opts [Hash]
123
118
  def try_stages(result, corr, opts)
124
119
  res = nil
125
- %w[NP WD CD DIS FDIS PRF IS AWI].each do |st| # try stages
120
+ %w[NP WD CD DIS FDIS PRF IS AWI TR].each do |st| # try stages
126
121
  c = yield st
127
122
  res = search_code result, c, corr, opts
128
123
  return res unless res.empty?
@@ -134,8 +129,8 @@ module RelatonIso
134
129
  result.select do |i|
135
130
  (opts[:all_parts] || i.hit["docRef"] =~ %r{^#{code}(?!-)}) && (
136
131
  corr && %r{^#{code}[\w-]*(:\d{4})?/#{corr}} =~ i.hit["docRef"] ||
137
- %r{^#{code}[\w-]*(:\d{4})?/} !~ i.hit["docRef"] && !corr
138
- )
132
+ !corr && %r{^#{code}[\w-]*(:\d{4})?/} !~ i.hit["docRef"]
133
+ ) # && %r{^#{code}} =~ i.hit["docRef"]
139
134
  end
140
135
  end
141
136
 
@@ -159,11 +154,15 @@ module RelatonIso
159
154
  end
160
155
  return { years: missed_years } unless hits.any?
161
156
 
162
- return { ret: hits.first.fetch } if !opts[:all_parts] || hits.size == 1
157
+ return { ret: hits.first.fetch(opts[:lang]) } if !opts[:all_parts] || hits.size == 1
163
158
 
164
- { ret: hits.to_all_parts }
159
+ { ret: hits.to_all_parts(opts[:lang]) }
165
160
  end
166
161
 
162
+ # @param code [String]
163
+ # @param year [String, NilClass]
164
+ # @param corr [String, NilClass]
165
+ # @param opts [Hash]
167
166
  def isobib_get1(code, year, corr, opts)
168
167
  # return iev(code) if /^IEC 60050-/.match code
169
168
  result = isobib_search_filter(code, corr, opts) || return
@@ -27,10 +27,11 @@ module RelatonIso
27
27
 
28
28
  class << self
29
29
  # Parse page.
30
- # @param hit [Hash]
30
+ # @param hit_data [Hash]
31
+ # @param lang [String, NilClass]
31
32
  # @return [Hash]
32
33
  # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
33
- def parse_page(hit_data)
34
+ def parse_page(hit_data, lang = nil)
34
35
  path = "/contents/data/standard#{hit_data["splitPath"]}/#{hit_data["csnumber"]}.html"
35
36
  doc, url = get_page path
36
37
 
@@ -38,15 +39,15 @@ module RelatonIso
38
39
  edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")&.
39
40
  children&.last&.text&.match(/\d+/)&.to_s
40
41
 
41
- titles, abstract = fetch_titles_abstract(doc)
42
+ titles, abstract, langs = fetch_titles_abstract(doc, lang)
42
43
 
43
44
  RelatonIsoBib::IsoBibliographicItem.new(
44
45
  fetched: Date.today.to_s,
45
46
  docid: fetch_docid(hit_data["docRef"]),
46
47
  docnumber: fetch_docnumber(doc),
47
48
  edition: edition,
48
- language: langs(doc).map { |l| l[:lang] },
49
- script: langs(doc).map { |l| script(l[:lang]) }.uniq,
49
+ language: langs.map { |l| l[:lang] },
50
+ script: langs.map { |l| script(l[:lang]) }.uniq,
50
51
  title: titles,
51
52
  doctype: fetch_type(hit_data["docRef"]),
52
53
  docstatus: fetch_status(doc),
@@ -68,40 +69,46 @@ module RelatonIso
68
69
 
69
70
  # Fetch titles and abstracts.
70
71
  # @param doc [Nokigiri::HTML::Document]
72
+ # @param lang [String, NilClass]
71
73
  # @return [Array<Array>]
72
74
  # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
73
- def fetch_titles_abstract(doc)
75
+ def fetch_titles_abstract(doc, lang)
74
76
  titles = []
75
77
  abstract = []
76
- langs(doc).each do |lang|
78
+ langs = languages(doc, lang).reduce([]) do |s, l|
77
79
  # Don't need to get page for en. We already have it.
78
- d = lang[:path] ? get_page(lang[:path])[0] : doc
79
- titles << fetch_title(d, lang[:lang])
80
+ d = l[:path] ? get_page(l[:path])[0] : doc
81
+ unless d.at("//h5[@class='help-block'][.='недоступно на русском языке']")
82
+ s << l
83
+ titles << fetch_title(d, l[:lang])
80
84
 
81
- # Fetch abstracts.
82
- abstract_content = d.css("div[itemprop='description'] p").text
83
- next if abstract_content.empty?
84
-
85
- abstract << {
86
- content: abstract_content,
87
- language: lang[:lang],
88
- script: script(lang[:lang]),
89
- format: "text/plain",
90
- }
85
+ # Fetch abstracts.
86
+ abstract_content = d.css("div[itemprop='description'] p").text
87
+ unless abstract_content.empty?
88
+ abstract << {
89
+ content: abstract_content,
90
+ language: l[:lang],
91
+ script: script(l[:lang]),
92
+ format: "text/plain",
93
+ }
94
+ end
95
+ end
96
+ s
91
97
  end
92
- [titles, abstract]
98
+ [titles, abstract, langs]
93
99
  end
94
100
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
95
101
 
96
- # Get langs.
102
+ # Returns available languages.
97
103
  # @param doc [Nokogiri::HTML::Document]
104
+ # @pqrqm lang [String, NilClass]
98
105
  # @return [Array<Hash>]
99
- def langs(doc)
106
+ def languages(doc, lang)
100
107
  lgs = [{ lang: "en" }]
101
108
  doc.css("li#lang-switcher ul li a").each do |lang_link|
102
109
  lang_path = lang_link.attr("href")
103
- lang = lang_path.match(%r{^\/(fr)\/})
104
- lgs << { lang: lang[1], path: lang_path } if lang
110
+ l = lang_path.match(%r{^\/(fr)\/})
111
+ lgs << { lang: l[1], path: lang_path } if l && (!lang || l[1] == lang)
105
112
  end
106
113
  lgs
107
114
  end
@@ -200,9 +207,12 @@ module RelatonIso
200
207
  def fetch_relations(doc)
201
208
  doc.css("ul.steps li").reduce([]) do |a, r|
202
209
  r_type = r.css("strong").text
210
+ date = []
203
211
  type = case r_type
204
212
  when "Previously", "Will be replaced by" then "obsoletes"
205
213
  when "Corrigenda/Amendments", "Revised by", "Now confirmed"
214
+ date << { type: "circulated",
215
+ on: doc.xpath('//span[@class="stage-date"]').last.text }
206
216
  "updates"
207
217
  else r_type
208
218
  end
@@ -213,7 +223,7 @@ module RelatonIso
213
223
  content: id.text, format: "text/plain",
214
224
  )
215
225
  bibitem = RelatonIsoBib::IsoBibliographicItem.new(
216
- formattedref: fref,
226
+ formattedref: fref, date: date
217
227
  )
218
228
  { type: type, bibitem: bibitem }
219
229
  end
@@ -257,6 +267,7 @@ module RelatonIso
257
267
  def script(lang)
258
268
  case lang
259
269
  when "en", "fr" then "Latn"
270
+ # when "ru" then "Cyrl"
260
271
  end
261
272
  end
262
273
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RelatonIso
4
- VERSION = "0.8.0"
4
+ VERSION = "0.8.1"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-iso
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-01-11 00:00:00.000000000 Z
11
+ date: 2020-01-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: byebug