relaton-iso 0.5.1 → 0.5.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/Gemfile.lock +2 -2
- data/appveyor.yml +1 -0
- data/lib/relaton_iso/iso_bibliography.rb +25 -14
- data/lib/relaton_iso/scrapper.rb +20 -20
- data/lib/relaton_iso/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 251529f7af2fbfc0a28760e6f7100a3245091229
|
4
|
+
data.tar.gz: d9be71b4df57130f0437b9112072741c37cd9b08
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e4fc36c5c32d790c39f7b34034960fb73da5329f1ef2f63af2a0417dd8663e915a622a9981abc10cdf334f8dbda5db12708cd3f162428ba8a9283984de3b2793
|
7
|
+
data.tar.gz: eaeadce774f686eaedfcc096071e4fe03b25b72f8c76b702ad302eb21e659ddf5d26d49411064aa5c5c75a3fe1e44dbc86bcc3ec049c116c1ee6fa0b37304acf
|
data/.travis.yml
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
relaton-iso (0.5.
|
4
|
+
relaton-iso (0.5.2)
|
5
5
|
algoliasearch
|
6
6
|
relaton-iec (~> 0.3.0)
|
7
7
|
relaton-iso-bib (~> 0.2.0)
|
@@ -44,7 +44,7 @@ GEM
|
|
44
44
|
relaton-bib (0.2.3)
|
45
45
|
addressable
|
46
46
|
nokogiri (~> 1.8.4)
|
47
|
-
relaton-iec (0.3.
|
47
|
+
relaton-iec (0.3.1)
|
48
48
|
addressable
|
49
49
|
relaton-iso-bib (~> 0.2.0)
|
50
50
|
relaton-iso-bib (0.2.3)
|
data/appveyor.yml
CHANGED
@@ -31,8 +31,15 @@ module RelatonIso
|
|
31
31
|
# :keep_year if undated reference should return actual reference with year
|
32
32
|
# @return [String] Relaton XML serialisation of reference
|
33
33
|
def get(code, year, opts)
|
34
|
+
%r{
|
35
|
+
^(?<code1>[^\s]+\s[^/]+) # match code
|
36
|
+
/?
|
37
|
+
(?<corr>(Amd|CD Amd|Cor|CD Cor)\s\d+:?(\d{4})?(/Cor \d+:\d{4})?) # match correction
|
38
|
+
}x =~ code
|
39
|
+
code = code1 if code1
|
40
|
+
|
34
41
|
if year.nil?
|
35
|
-
/^(?<code1>[
|
42
|
+
/^(?<code1>[^\s]+\s[\d-]+):?(?<year1>\d{4})?/ =~ code
|
36
43
|
unless code1.nil?
|
37
44
|
code = code1
|
38
45
|
year = year1
|
@@ -41,11 +48,11 @@ module RelatonIso
|
|
41
48
|
code += "-1" if opts[:all_parts]
|
42
49
|
return RelatonIec::IecBibliography.get(code, year, opts) if %r[^ISO/IEC DIR].match code
|
43
50
|
|
44
|
-
ret = isobib_get1(code, year,
|
51
|
+
ret = isobib_get1(code, year, corr)
|
45
52
|
if ret.nil? && code =~ %r[^ISO\s]
|
46
53
|
c = code.gsub "ISO", "ISO/IEC"
|
47
54
|
warn "Attempting ISO/IEC retrieval"
|
48
|
-
ret = isobib_get1(c, year,
|
55
|
+
ret = isobib_get1(c, year, corr)
|
49
56
|
end
|
50
57
|
return nil if ret.nil?
|
51
58
|
|
@@ -81,20 +88,24 @@ module RelatonIso
|
|
81
88
|
workers.result.sort { |x, y| x[:i] <=> y[:i] }.map { |x| x[:hit] }
|
82
89
|
end
|
83
90
|
|
84
|
-
def isobib_search_filter(code)
|
85
|
-
docidrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+}
|
86
|
-
corrigrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+:[0-9]+/}
|
91
|
+
def isobib_search_filter(code, corr)
|
92
|
+
# docidrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+}
|
93
|
+
# corrigrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+:[0-9]+/}
|
87
94
|
warn "fetching #{code}..."
|
88
95
|
result = search(code)
|
89
|
-
result.
|
90
|
-
ret
|
96
|
+
result.reduce([]) do |ret, page|
|
97
|
+
ret += page.select do |i|
|
91
98
|
i.hit["title"] &&
|
92
|
-
i.hit["title"]
|
93
|
-
|
99
|
+
i.hit["title"] =~ %r{^#{code}} && (
|
100
|
+
corr && %r{^#{code}[d-]*(:\d{4})?/#{corr}} =~ i.hit["title"] ||
|
101
|
+
%r{^#{code}[\d-]*(:\d{4})?/} !~ i.hit["title"] && !corr
|
102
|
+
)
|
94
103
|
end
|
95
|
-
return ret
|
104
|
+
return ret if ret.size > 9
|
105
|
+
|
106
|
+
ret
|
96
107
|
end
|
97
|
-
[]
|
108
|
+
# []
|
98
109
|
end
|
99
110
|
|
100
111
|
# Sort through the results from RelatonIso, fetching them three at a time,
|
@@ -120,9 +131,9 @@ module RelatonIso
|
|
120
131
|
{ years: missed_years }
|
121
132
|
end
|
122
133
|
|
123
|
-
def isobib_get1(code, year,
|
134
|
+
def isobib_get1(code, year, corr)
|
124
135
|
# return iev(code) if /^IEC 60050-/.match code
|
125
|
-
result = isobib_search_filter(code) || return
|
136
|
+
result = isobib_search_filter(code, corr) || return
|
126
137
|
ret = isobib_results_filter(result, year)
|
127
138
|
return ret[:ret] if ret[:ret]
|
128
139
|
|
data/lib/relaton_iso/scrapper.rb
CHANGED
@@ -70,7 +70,7 @@ module RelatonIso
|
|
70
70
|
type: fetch_type(hit_data["title"]),
|
71
71
|
docstatus: fetch_status(doc, hit_data["status"]),
|
72
72
|
ics: fetch_ics(doc),
|
73
|
-
dates: fetch_dates(doc),
|
73
|
+
dates: fetch_dates(doc, hit_data["title"]),
|
74
74
|
contributors: fetch_contributors(hit_data["title"]),
|
75
75
|
editorialgroup: fetch_workgroup(doc),
|
76
76
|
abstract: abstract,
|
@@ -341,19 +341,33 @@ module RelatonIso
|
|
341
341
|
end
|
342
342
|
end
|
343
343
|
|
344
|
+
# rubocop:disable Metrics/MethodLength
|
344
345
|
# Fetch dates
|
345
346
|
# @param doc [Nokogiri::HTML::Document]
|
346
347
|
# @return [Array<Hash>]
|
347
|
-
def fetch_dates(doc)
|
348
|
+
def fetch_dates(doc, title)
|
348
349
|
dates = []
|
349
|
-
|
350
|
-
|
351
|
-
|
350
|
+
%r{^[^\s]+\s[\d-]+:(?<ref_date_str>\d{4})} =~ title
|
351
|
+
pub_date_str = doc.xpath("//span[@itemprop='releaseDate']").text
|
352
|
+
if ref_date_str
|
353
|
+
ref_date = Date.strptime ref_date_str, "%Y"
|
354
|
+
if pub_date_str.empty?
|
355
|
+
dates << { type: "published", on: ref_date_str }
|
356
|
+
else
|
357
|
+
pub_date = Date.strptime pub_date_str, "%Y"
|
358
|
+
if pub_date.year > ref_date.year
|
359
|
+
dates << { type: "published", on: ref_date_str }
|
360
|
+
dates << { type: "updated", on: pub_date_str }
|
361
|
+
else
|
362
|
+
dates << { type: "published", on: pub_date_str }
|
363
|
+
end
|
364
|
+
end
|
365
|
+
elsif !pub_date_str.empty?
|
366
|
+
dates << { type: "published", on: pub_date_str }
|
352
367
|
end
|
353
368
|
dates
|
354
369
|
end
|
355
370
|
|
356
|
-
# rubocop:disable Metrics/MethodLength
|
357
371
|
def fetch_contributors(title)
|
358
372
|
title.sub(/\s.*/, "").split("/").map do |abbrev|
|
359
373
|
case abbrev
|
@@ -408,20 +422,6 @@ module RelatonIso
|
|
408
422
|
{ owner: { name: owner_name }, from: from }
|
409
423
|
end
|
410
424
|
end
|
411
|
-
|
412
|
-
# private
|
413
|
-
#
|
414
|
-
# def next_hits_page(next_page)
|
415
|
-
# page = @index.search @text, facetFilters: ['category:standard'],
|
416
|
-
# page: next_page
|
417
|
-
# page.each do |key, value|
|
418
|
-
# if key == 'hits'
|
419
|
-
# @docs[key] += value
|
420
|
-
# else
|
421
|
-
# @docs[key] = value
|
422
|
-
# end
|
423
|
-
# end
|
424
|
-
# end
|
425
425
|
end
|
426
426
|
# rubocop:enable Metrics/ModuleLength
|
427
427
|
end
|
data/lib/relaton_iso/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-iso
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-07-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|