relaton-iso 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/Gemfile.lock +2 -2
- data/appveyor.yml +1 -0
- data/lib/relaton_iso/iso_bibliography.rb +25 -14
- data/lib/relaton_iso/scrapper.rb +20 -20
- data/lib/relaton_iso/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 251529f7af2fbfc0a28760e6f7100a3245091229
|
4
|
+
data.tar.gz: d9be71b4df57130f0437b9112072741c37cd9b08
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e4fc36c5c32d790c39f7b34034960fb73da5329f1ef2f63af2a0417dd8663e915a622a9981abc10cdf334f8dbda5db12708cd3f162428ba8a9283984de3b2793
|
7
|
+
data.tar.gz: eaeadce774f686eaedfcc096071e4fe03b25b72f8c76b702ad302eb21e659ddf5d26d49411064aa5c5c75a3fe1e44dbc86bcc3ec049c116c1ee6fa0b37304acf
|
data/.travis.yml
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
relaton-iso (0.5.
|
4
|
+
relaton-iso (0.5.2)
|
5
5
|
algoliasearch
|
6
6
|
relaton-iec (~> 0.3.0)
|
7
7
|
relaton-iso-bib (~> 0.2.0)
|
@@ -44,7 +44,7 @@ GEM
|
|
44
44
|
relaton-bib (0.2.3)
|
45
45
|
addressable
|
46
46
|
nokogiri (~> 1.8.4)
|
47
|
-
relaton-iec (0.3.
|
47
|
+
relaton-iec (0.3.1)
|
48
48
|
addressable
|
49
49
|
relaton-iso-bib (~> 0.2.0)
|
50
50
|
relaton-iso-bib (0.2.3)
|
data/appveyor.yml
CHANGED
@@ -31,8 +31,15 @@ module RelatonIso
|
|
31
31
|
# :keep_year if undated reference should return actual reference with year
|
32
32
|
# @return [String] Relaton XML serialisation of reference
|
33
33
|
def get(code, year, opts)
|
34
|
+
%r{
|
35
|
+
^(?<code1>[^\s]+\s[^/]+) # match code
|
36
|
+
/?
|
37
|
+
(?<corr>(Amd|CD Amd|Cor|CD Cor)\s\d+:?(\d{4})?(/Cor \d+:\d{4})?) # match correction
|
38
|
+
}x =~ code
|
39
|
+
code = code1 if code1
|
40
|
+
|
34
41
|
if year.nil?
|
35
|
-
/^(?<code1>[
|
42
|
+
/^(?<code1>[^\s]+\s[\d-]+):?(?<year1>\d{4})?/ =~ code
|
36
43
|
unless code1.nil?
|
37
44
|
code = code1
|
38
45
|
year = year1
|
@@ -41,11 +48,11 @@ module RelatonIso
|
|
41
48
|
code += "-1" if opts[:all_parts]
|
42
49
|
return RelatonIec::IecBibliography.get(code, year, opts) if %r[^ISO/IEC DIR].match code
|
43
50
|
|
44
|
-
ret = isobib_get1(code, year,
|
51
|
+
ret = isobib_get1(code, year, corr)
|
45
52
|
if ret.nil? && code =~ %r[^ISO\s]
|
46
53
|
c = code.gsub "ISO", "ISO/IEC"
|
47
54
|
warn "Attempting ISO/IEC retrieval"
|
48
|
-
ret = isobib_get1(c, year,
|
55
|
+
ret = isobib_get1(c, year, corr)
|
49
56
|
end
|
50
57
|
return nil if ret.nil?
|
51
58
|
|
@@ -81,20 +88,24 @@ module RelatonIso
|
|
81
88
|
workers.result.sort { |x, y| x[:i] <=> y[:i] }.map { |x| x[:hit] }
|
82
89
|
end
|
83
90
|
|
84
|
-
def isobib_search_filter(code)
|
85
|
-
docidrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+}
|
86
|
-
corrigrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+:[0-9]+/}
|
91
|
+
def isobib_search_filter(code, corr)
|
92
|
+
# docidrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+}
|
93
|
+
# corrigrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+:[0-9]+/}
|
87
94
|
warn "fetching #{code}..."
|
88
95
|
result = search(code)
|
89
|
-
result.
|
90
|
-
ret
|
96
|
+
result.reduce([]) do |ret, page|
|
97
|
+
ret += page.select do |i|
|
91
98
|
i.hit["title"] &&
|
92
|
-
i.hit["title"]
|
93
|
-
|
99
|
+
i.hit["title"] =~ %r{^#{code}} && (
|
100
|
+
corr && %r{^#{code}[d-]*(:\d{4})?/#{corr}} =~ i.hit["title"] ||
|
101
|
+
%r{^#{code}[\d-]*(:\d{4})?/} !~ i.hit["title"] && !corr
|
102
|
+
)
|
94
103
|
end
|
95
|
-
return ret
|
104
|
+
return ret if ret.size > 9
|
105
|
+
|
106
|
+
ret
|
96
107
|
end
|
97
|
-
[]
|
108
|
+
# []
|
98
109
|
end
|
99
110
|
|
100
111
|
# Sort through the results from RelatonIso, fetching them three at a time,
|
@@ -120,9 +131,9 @@ module RelatonIso
|
|
120
131
|
{ years: missed_years }
|
121
132
|
end
|
122
133
|
|
123
|
-
def isobib_get1(code, year,
|
134
|
+
def isobib_get1(code, year, corr)
|
124
135
|
# return iev(code) if /^IEC 60050-/.match code
|
125
|
-
result = isobib_search_filter(code) || return
|
136
|
+
result = isobib_search_filter(code, corr) || return
|
126
137
|
ret = isobib_results_filter(result, year)
|
127
138
|
return ret[:ret] if ret[:ret]
|
128
139
|
|
data/lib/relaton_iso/scrapper.rb
CHANGED
@@ -70,7 +70,7 @@ module RelatonIso
|
|
70
70
|
type: fetch_type(hit_data["title"]),
|
71
71
|
docstatus: fetch_status(doc, hit_data["status"]),
|
72
72
|
ics: fetch_ics(doc),
|
73
|
-
dates: fetch_dates(doc),
|
73
|
+
dates: fetch_dates(doc, hit_data["title"]),
|
74
74
|
contributors: fetch_contributors(hit_data["title"]),
|
75
75
|
editorialgroup: fetch_workgroup(doc),
|
76
76
|
abstract: abstract,
|
@@ -341,19 +341,33 @@ module RelatonIso
|
|
341
341
|
end
|
342
342
|
end
|
343
343
|
|
344
|
+
# rubocop:disable Metrics/MethodLength
|
344
345
|
# Fetch dates
|
345
346
|
# @param doc [Nokogiri::HTML::Document]
|
346
347
|
# @return [Array<Hash>]
|
347
|
-
def fetch_dates(doc)
|
348
|
+
def fetch_dates(doc, title)
|
348
349
|
dates = []
|
349
|
-
|
350
|
-
|
351
|
-
|
350
|
+
%r{^[^\s]+\s[\d-]+:(?<ref_date_str>\d{4})} =~ title
|
351
|
+
pub_date_str = doc.xpath("//span[@itemprop='releaseDate']").text
|
352
|
+
if ref_date_str
|
353
|
+
ref_date = Date.strptime ref_date_str, "%Y"
|
354
|
+
if pub_date_str.empty?
|
355
|
+
dates << { type: "published", on: ref_date_str }
|
356
|
+
else
|
357
|
+
pub_date = Date.strptime pub_date_str, "%Y"
|
358
|
+
if pub_date.year > ref_date.year
|
359
|
+
dates << { type: "published", on: ref_date_str }
|
360
|
+
dates << { type: "updated", on: pub_date_str }
|
361
|
+
else
|
362
|
+
dates << { type: "published", on: pub_date_str }
|
363
|
+
end
|
364
|
+
end
|
365
|
+
elsif !pub_date_str.empty?
|
366
|
+
dates << { type: "published", on: pub_date_str }
|
352
367
|
end
|
353
368
|
dates
|
354
369
|
end
|
355
370
|
|
356
|
-
# rubocop:disable Metrics/MethodLength
|
357
371
|
def fetch_contributors(title)
|
358
372
|
title.sub(/\s.*/, "").split("/").map do |abbrev|
|
359
373
|
case abbrev
|
@@ -408,20 +422,6 @@ module RelatonIso
|
|
408
422
|
{ owner: { name: owner_name }, from: from }
|
409
423
|
end
|
410
424
|
end
|
411
|
-
|
412
|
-
# private
|
413
|
-
#
|
414
|
-
# def next_hits_page(next_page)
|
415
|
-
# page = @index.search @text, facetFilters: ['category:standard'],
|
416
|
-
# page: next_page
|
417
|
-
# page.each do |key, value|
|
418
|
-
# if key == 'hits'
|
419
|
-
# @docs[key] += value
|
420
|
-
# else
|
421
|
-
# @docs[key] = value
|
422
|
-
# end
|
423
|
-
# end
|
424
|
-
# end
|
425
425
|
end
|
426
426
|
# rubocop:enable Metrics/ModuleLength
|
427
427
|
end
|
data/lib/relaton_iso/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-07-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|