relaton-iso 0.8.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a73ac19c4da469921da4bad644b50f148d750476852e1a43c85c2cf0ffe52b87
4
- data.tar.gz: b7bf3b134c1dc1d851b136a78d35241572cd9706f4c451791e268932b4a68b70
3
+ metadata.gz: 23cfcc3b3c94cf0988d6087a1afacb4bcc6065249b4f6785879e8242384bf00d
4
+ data.tar.gz: f973dc6ba692b89acd1c6b4f9f75b9261ca7aff09557ed58605a7b1b8e499719
5
5
  SHA512:
6
- metadata.gz: bd884caa8c53e3d4b020335e1adbf2abab31f657069a249f821e4cac21a1d126b69d8d3c281058217b3d3aaa9c5f2f4a6d6b361ca328c8a0872fb5e1cba16565
7
- data.tar.gz: 0c92680ecce087a3437176f5d980b59e5fca2416d9f99ac5cdd094b93106c7286cf2f9fded7544c156b54aecf988dc83917c2cf08ddfb8fa05c6f8049bf578d2
6
+ metadata.gz: 8d88890b35c169076ceb3350eedb036116a4fe30403ae23e6e9300718d5cfd8d6d37f49a6724bdb2c4b459a7782bd429cab660d4a195bcef92ac0dc347021c6f
7
+ data.tar.gz: e68d82ea105bba5dc0ab989a35eb02062adefdb45302c17065f9b47b3bf3f3b6eb9ea4e3caa43177782235013f984ea6fe574cad8e95fe1a535319d63c8b573e
@@ -183,6 +183,36 @@ RelatonIso::IsoBibliography.get('ISO 19115', "2014", {all_parts: true}).title
183
183
  @type="main">]
184
184
  ----
185
185
 
186
+ === Get specific language
187
+
188
+ [source,ruby]
189
+ ----
190
+ item = RelatonIso::IsoBibliography.get 'ISO 19115', nil, {lang: "en"}
191
+ item.to_xml
192
+ => <bibitem id="ISO19115-1-2014" type="standard">
193
+ <fetched>2020-01-22</fetched>
194
+ <title type="title-intro" format="text/plain" language="en" script="Latn">Geographic information</title>
195
+ <title type="title-main" format="text/plain" language="en" script="Latn">Metadata</title>
196
+ <title type="main" format="text/plain" language="en" script="Latn">Geographic information – Metadata</title>
197
+ <uri type=\"src\">https://www.iso.org/standard/53798.html</uri>
198
+ ...
199
+ </bibitem>
200
+
201
+ item = RelatonIso::IsoBibliography.get 'ISO 19115', nil, {lang: "fr"}
202
+ item.to_xml
203
+ => <bibitem id="ISO19115-1-2014" type="standard">
204
+ <fetched>2020-01-22</fetched>
205
+ <title type="title-intro" format="text/plain" language="en" script="Latn">Geographic information</title>
206
+ <title type="title-main" format="text/plain" language="en" script="Latn">Metadata</title>
207
+ <title type="main" format="text/plain" language="en" script="Latn">Geographic information – Metadata</title>
208
+ <title type="title-intro" format="text/plain" language="fr" script="Latn">Information géographique</title>
209
+ <title type="title-main" format="text/plain" language="fr" script="Latn">Métadonnées</title>
210
+ <title type="main" format="text/plain" language="fr" script="Latn">Information géographique – Métadonnées</title>
211
+ <uri type="src">https://www.iso.org/standard/53798.html</uri>
212
+ ...
213
+ </bibitem>
214
+ ----
215
+
186
216
  == Development
187
217
 
188
218
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -7,9 +7,10 @@ module RelatonIso
7
7
  attr_reader :hit_collection
8
8
 
9
9
  # Parse page.
10
+ # @param lang [String, NilClass]
10
11
  # @return [RelatonIso::IsoBibliographicItem]
11
- def fetch
12
- @fetch ||= Scrapper.parse_page @hit
12
+ def fetch(lang = nil)
13
+ @fetch ||= Scrapper.parse_page @hit, lang
13
14
  end
14
15
 
15
16
  # @param builder [Nokogiri::XML::Builder]
@@ -10,25 +10,23 @@ module RelatonIso
10
10
 
11
11
  def_delegators :@array, :<<, :[], :first, :empty?, :any?, :size
12
12
 
13
- # @return [TrueClass, FalseClass]
14
- # attr_reader :fetched
15
-
16
- # @return [RelatonIso::HitPages]
17
- # attr_reader :hit_pages
18
-
19
- # @return [String]
13
+ # @return [String, NilClass]
20
14
  attr_reader :text
21
15
 
22
- # @param hits [Array<Hash>]
16
+ # @param text [String] reference to search
23
17
  def initialize(text)
24
18
  @array = []
25
19
  @text = text
26
- %r{\s(?<num>\d+)(-(?<part>\d+))?} =~ text
20
+ %r{\s(?<num>\d+)(-(?<part>[\d-]+))?} =~ text
27
21
  http = Net::HTTP.new "www.iso.org", 443
28
22
  http.use_ssl = true
29
23
  search = ["status=ENT_ACTIVE,ENT_PROGRESS,ENT_INACTIVE,ENT_DELETED"]
30
24
  search << "docNumber=#{num}"
31
25
  search << "docPartNo=#{part}" if part
26
+ # if year
27
+ # search << "stageDateStart=#{Date.new(year.to_i).strftime("%Y-%m-%d")}"
28
+ # search << "stageDateEnd=#{Date.new(year.to_i, 12, 31).strftime("%Y-%m-%d")}"
29
+ # end
32
30
  q = search.join "&"
33
31
  resp = http.get("/cms/render/live/en/sites/isoorg.advancedSearch.do?#{q}",
34
32
  "Accept" => "application/json, text/plain, */*")
@@ -55,27 +53,14 @@ module RelatonIso
55
53
  self
56
54
  end
57
55
 
58
- # @return [RelatonIso::HitCollection]
59
- # def fetch
60
- # return self if @fetched
61
-
62
- # workers = RelatonBib::WorkersPool.new 4
63
- # workers.worker(&:fetch)
64
- # @array.each do |hit|
65
- # workers << hit
66
- # end
67
- # workers.end
68
- # workers.result
69
- # @fetched = true
70
- # self
71
- # end
72
-
73
- def to_all_parts
56
+ # @param lang [String, NilClass]
57
+ # @return [RelatonIsoBib::IsoBibliographicItem]
58
+ def to_all_parts(lang = nil)
74
59
  parts = @array.select { |h| !h.hit["docPart"].empty? }
75
60
  hit = parts.min_by { |h| h.hit["docPart"].to_i }
76
- return @array.first.fetch unless hit
61
+ return @array.first.fetch lang unless hit
77
62
 
78
- bibitem = hit.fetch
63
+ bibitem = hit.fetch lang
79
64
  bibitem.to_all_parts
80
65
  parts.reject { |h| h.hit["docRef"] == hit.hit["docRef"] }.each do |hi|
81
66
  isobib = RelatonIsoBib::IsoBibliographicItem.new(
@@ -19,24 +19,19 @@ module RelatonIso
19
19
  raise RelatonBib::RequestError, "Could not access http://www.iso.org"
20
20
  end
21
21
 
22
- # @param text [String]
23
- # @return [Array<RelatonIso::IsoBibliographicItem>]
24
- # def search_and_fetch(text)
25
- # Scrapper.get(text)
26
- # end
27
-
28
22
  # @param ref [String] the ISO standard Code to look up (e..g "ISO 9000")
29
- # @param year [String] the year the standard was published (optional)
30
- # @param opts [Hash] options; restricted to :all_parts if all-parts reference is required,
31
- # :keep_year if undated reference should return actual reference with year
23
+ # @param year [String, NilClass] the year the standard was published
24
+ # @param opts [Hash] options; restricted to :all_parts if all-parts
25
+ # reference is required, :keep_year if undated reference should
26
+ # return actual reference with year
32
27
  # @return [String] Relaton XML serialisation of reference
33
- def get(ref, year, opts)
28
+ def get(ref, year = nil, opts = {})
34
29
  opts[:ref] = ref
35
30
 
36
31
  %r{
37
32
  ^(?<code1>[^\s]+\s[^/]+) # match code
38
33
  /?
39
- (?<corr>(Amd|DAmd|(CD|WD|AWI|NP)\sAmd|Cor|CD\sCor|FDAmd)\s\d+ # correction name
34
+ (?<corr>(Amd|DAmd|(CD|WD|AWI|NP)\sAmd|Cor|CD\sCor|FDAmd|PRF\sAmd)\s\d+ # correction name
40
35
  :?(\d{4})?(/Cor\s\d+:\d{4})?) # match correction year
41
36
  }x =~ ref
42
37
  code = code1 || ref
@@ -122,7 +117,7 @@ module RelatonIso
122
117
  # @param opts [Hash]
123
118
  def try_stages(result, corr, opts)
124
119
  res = nil
125
- %w[NP WD CD DIS FDIS PRF IS AWI].each do |st| # try stages
120
+ %w[NP WD CD DIS FDIS PRF IS AWI TR].each do |st| # try stages
126
121
  c = yield st
127
122
  res = search_code result, c, corr, opts
128
123
  return res unless res.empty?
@@ -134,8 +129,8 @@ module RelatonIso
134
129
  result.select do |i|
135
130
  (opts[:all_parts] || i.hit["docRef"] =~ %r{^#{code}(?!-)}) && (
136
131
  corr && %r{^#{code}[\w-]*(:\d{4})?/#{corr}} =~ i.hit["docRef"] ||
137
- %r{^#{code}[\w-]*(:\d{4})?/} !~ i.hit["docRef"] && !corr
138
- )
132
+ !corr && %r{^#{code}[\w-]*(:\d{4})?/} !~ i.hit["docRef"]
133
+ ) # && %r{^#{code}} =~ i.hit["docRef"]
139
134
  end
140
135
  end
141
136
 
@@ -159,11 +154,15 @@ module RelatonIso
159
154
  end
160
155
  return { years: missed_years } unless hits.any?
161
156
 
162
- return { ret: hits.first.fetch } if !opts[:all_parts] || hits.size == 1
157
+ return { ret: hits.first.fetch(opts[:lang]) } if !opts[:all_parts] || hits.size == 1
163
158
 
164
- { ret: hits.to_all_parts }
159
+ { ret: hits.to_all_parts(opts[:lang]) }
165
160
  end
166
161
 
162
+ # @param code [String]
163
+ # @param year [String, NilClass]
164
+ # @param corr [String, NilClass]
165
+ # @param opts [Hash]
167
166
  def isobib_get1(code, year, corr, opts)
168
167
  # return iev(code) if /^IEC 60050-/.match code
169
168
  result = isobib_search_filter(code, corr, opts) || return
@@ -27,10 +27,11 @@ module RelatonIso
27
27
 
28
28
  class << self
29
29
  # Parse page.
30
- # @param hit [Hash]
30
+ # @param hit_data [Hash]
31
+ # @param lang [String, NilClass]
31
32
  # @return [Hash]
32
33
  # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
33
- def parse_page(hit_data)
34
+ def parse_page(hit_data, lang = nil)
34
35
  path = "/contents/data/standard#{hit_data["splitPath"]}/#{hit_data["csnumber"]}.html"
35
36
  doc, url = get_page path
36
37
 
@@ -38,15 +39,15 @@ module RelatonIso
38
39
  edition = doc&.xpath("//strong[contains(text(), 'Edition')]/..")&.
39
40
  children&.last&.text&.match(/\d+/)&.to_s
40
41
 
41
- titles, abstract = fetch_titles_abstract(doc)
42
+ titles, abstract, langs = fetch_titles_abstract(doc, lang)
42
43
 
43
44
  RelatonIsoBib::IsoBibliographicItem.new(
44
45
  fetched: Date.today.to_s,
45
46
  docid: fetch_docid(hit_data["docRef"]),
46
47
  docnumber: fetch_docnumber(doc),
47
48
  edition: edition,
48
- language: langs(doc).map { |l| l[:lang] },
49
- script: langs(doc).map { |l| script(l[:lang]) }.uniq,
49
+ language: langs.map { |l| l[:lang] },
50
+ script: langs.map { |l| script(l[:lang]) }.uniq,
50
51
  title: titles,
51
52
  doctype: fetch_type(hit_data["docRef"]),
52
53
  docstatus: fetch_status(doc),
@@ -68,40 +69,46 @@ module RelatonIso
68
69
 
69
70
  # Fetch titles and abstracts.
70
71
  # @param doc [Nokigiri::HTML::Document]
72
+ # @param lang [String, NilClass]
71
73
  # @return [Array<Array>]
72
74
  # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
73
- def fetch_titles_abstract(doc)
75
+ def fetch_titles_abstract(doc, lang)
74
76
  titles = []
75
77
  abstract = []
76
- langs(doc).each do |lang|
78
+ langs = languages(doc, lang).reduce([]) do |s, l|
77
79
  # Don't need to get page for en. We already have it.
78
- d = lang[:path] ? get_page(lang[:path])[0] : doc
79
- titles << fetch_title(d, lang[:lang])
80
+ d = l[:path] ? get_page(l[:path])[0] : doc
81
+ unless d.at("//h5[@class='help-block'][.='недоступно на русском языке']")
82
+ s << l
83
+ titles << fetch_title(d, l[:lang])
80
84
 
81
- # Fetch abstracts.
82
- abstract_content = d.css("div[itemprop='description'] p").text
83
- next if abstract_content.empty?
84
-
85
- abstract << {
86
- content: abstract_content,
87
- language: lang[:lang],
88
- script: script(lang[:lang]),
89
- format: "text/plain",
90
- }
85
+ # Fetch abstracts.
86
+ abstract_content = d.css("div[itemprop='description'] p").text
87
+ unless abstract_content.empty?
88
+ abstract << {
89
+ content: abstract_content,
90
+ language: l[:lang],
91
+ script: script(l[:lang]),
92
+ format: "text/plain",
93
+ }
94
+ end
95
+ end
96
+ s
91
97
  end
92
- [titles, abstract]
98
+ [titles, abstract, langs]
93
99
  end
94
100
  # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
95
101
 
96
- # Get langs.
102
+ # Returns available languages.
97
103
  # @param doc [Nokogiri::HTML::Document]
104
+ # @pqrqm lang [String, NilClass]
98
105
  # @return [Array<Hash>]
99
- def langs(doc)
106
+ def languages(doc, lang)
100
107
  lgs = [{ lang: "en" }]
101
108
  doc.css("li#lang-switcher ul li a").each do |lang_link|
102
109
  lang_path = lang_link.attr("href")
103
- lang = lang_path.match(%r{^\/(fr)\/})
104
- lgs << { lang: lang[1], path: lang_path } if lang
110
+ l = lang_path.match(%r{^\/(fr)\/})
111
+ lgs << { lang: l[1], path: lang_path } if l && (!lang || l[1] == lang)
105
112
  end
106
113
  lgs
107
114
  end
@@ -200,9 +207,12 @@ module RelatonIso
200
207
  def fetch_relations(doc)
201
208
  doc.css("ul.steps li").reduce([]) do |a, r|
202
209
  r_type = r.css("strong").text
210
+ date = []
203
211
  type = case r_type
204
212
  when "Previously", "Will be replaced by" then "obsoletes"
205
213
  when "Corrigenda/Amendments", "Revised by", "Now confirmed"
214
+ date << { type: "circulated",
215
+ on: doc.xpath('//span[@class="stage-date"]').last.text }
206
216
  "updates"
207
217
  else r_type
208
218
  end
@@ -213,7 +223,7 @@ module RelatonIso
213
223
  content: id.text, format: "text/plain",
214
224
  )
215
225
  bibitem = RelatonIsoBib::IsoBibliographicItem.new(
216
- formattedref: fref,
226
+ formattedref: fref, date: date
217
227
  )
218
228
  { type: type, bibitem: bibitem }
219
229
  end
@@ -257,6 +267,7 @@ module RelatonIso
257
267
  def script(lang)
258
268
  case lang
259
269
  when "en", "fr" then "Latn"
270
+ # when "ru" then "Cyrl"
260
271
  end
261
272
  end
262
273
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RelatonIso
4
- VERSION = "0.8.0"
4
+ VERSION = "0.8.1"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-iso
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-01-11 00:00:00.000000000 Z
11
+ date: 2020-01-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: byebug