isobib 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/Gemfile.lock +7 -4
- data/README.adoc +4 -0
- data/isobib.gemspec +1 -0
- data/lib/isobib/iso_bibliography.rb +124 -0
- data/lib/isobib/scrapper.rb +1 -1
- data/lib/isobib/version.rb +1 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a1b8d223cafdb296a3332ca76dabcaeeffbe8a4d5658b4cd821e2b6cbee6a8aa
|
4
|
+
data.tar.gz: 443f128cee77c73bd940d55c68050f47863199419e90425b44c0fe5fc16034bb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b47023fc174c5ad11a8499aa98b17b9eaa60d5ba163afd110f86555625461239972d8b711fc981d2b6953756c4a0b2ff96ed3b95d5fe3d732f55f943ef0165f3
|
7
|
+
data.tar.gz: 5c0cc7ec86313db6274793c14ea5493e2d16b44f9a6b13af14eca204b0f6e45706d1e8cb9874a2bc99470197abb1a28231579832f3c3b3fbef5ed207bbfe9417
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,29 +1,31 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
isobib (0.1.
|
4
|
+
isobib (0.1.7)
|
5
5
|
algoliasearch
|
6
6
|
iso-bib-item (~> 0.1.2)
|
7
7
|
|
8
8
|
GEM
|
9
9
|
remote: https://rubygems.org/
|
10
10
|
specs:
|
11
|
-
algoliasearch (1.
|
11
|
+
algoliasearch (1.23.2)
|
12
12
|
httpclient (~> 2.8, >= 2.8.3)
|
13
13
|
json (>= 1.5.1)
|
14
14
|
byebug (10.0.2)
|
15
15
|
coderay (1.1.2)
|
16
16
|
diff-lcs (1.3)
|
17
17
|
docile (1.3.1)
|
18
|
+
duplicate (1.1.1)
|
18
19
|
httpclient (2.8.3)
|
19
|
-
iso-bib-item (0.1.
|
20
|
+
iso-bib-item (0.1.8)
|
21
|
+
duplicate
|
20
22
|
isoics (~> 0.1.6)
|
21
23
|
nokogiri
|
22
24
|
isoics (0.1.6)
|
23
25
|
json (2.1.0)
|
24
26
|
method_source (0.9.0)
|
25
27
|
mini_portile2 (2.3.0)
|
26
|
-
nokogiri (1.8.
|
28
|
+
nokogiri (1.8.3)
|
27
29
|
mini_portile2 (~> 2.3.0)
|
28
30
|
pry (0.11.3)
|
29
31
|
coderay (~> 1.1.0)
|
@@ -56,6 +58,7 @@ PLATFORMS
|
|
56
58
|
|
57
59
|
DEPENDENCIES
|
58
60
|
bundler (~> 1.16)
|
61
|
+
byebug
|
59
62
|
isobib!
|
60
63
|
pry-byebug
|
61
64
|
rake (~> 10.0)
|
data/README.adoc
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
= IsoBib: retrieve ISO Standards for bibliographic use using the BibliographicItem model
|
2
2
|
|
3
|
+
image:https://img.shields.io/gem/v/isobib.svg["Gem Version", link="https://rubygems.org/gems/isobib"]
|
4
|
+
image:https://img.shields.io/travis/riboseinc/isobib/master.svg["Build Status", link="https://travis-ci.org/riboseinc/isobib"]
|
5
|
+
image:https://codeclimate.com/github/riboseinc/isobib/badges/gpa.svg["Code Climate", link="https://codeclimate.com/github/riboseinc/isobib"]
|
6
|
+
|
3
7
|
IsoBib is a Ruby gem that implements the https://github.com/riboseinc/isodoc-models#iso-bibliographic-item[IsoBibliographicItem model].
|
4
8
|
|
5
9
|
You can use it to retrieve metadata of ISO Standards from https://www.iso.org, and access such metadata through the `IsoBibliographicItem` object.
|
data/isobib.gemspec
CHANGED
@@ -27,6 +27,7 @@ Gem::Specification.new do |spec|
|
|
27
27
|
|
28
28
|
spec.add_development_dependency 'bundler', '~> 1.16'
|
29
29
|
spec.add_development_dependency 'pry-byebug'
|
30
|
+
spec.add_development_dependency 'byebug'
|
30
31
|
spec.add_development_dependency 'rake', '~> 10.0'
|
31
32
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
32
33
|
spec.add_development_dependency 'simplecov'
|
@@ -19,6 +19,130 @@ module Isobib
|
|
19
19
|
def search_and_fetch(text)
|
20
20
|
Scrapper.get(text)
|
21
21
|
end
|
22
|
+
|
23
|
+
# @param code [String] the ISO standard Code to look up (e..g "ISO 9000")
|
24
|
+
# @param year [String] the year the standard was published (optional)
|
25
|
+
# @param opts [Hash] options; restricted to :all_parts if all-parts reference is required
|
26
|
+
# @return [String] Relaton XML serialisation of reference
|
27
|
+
def isobib_get(code, year, opts)
|
28
|
+
return iev.to_xml if code.casecmp? "IEV"
|
29
|
+
code += "-1" if opts[:all_parts]
|
30
|
+
ret = isobib_get1(code, year, opts)
|
31
|
+
return nil if ret.nil?
|
32
|
+
ret.to_most_recent_reference if !year
|
33
|
+
ret.to_all_parts if opts[:all_parts]
|
34
|
+
ret.to_xml
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def fetch_ref_err(code, year, missed_years)
|
40
|
+
id = year ? "#{code}:#{year}" : code
|
41
|
+
warn "WARNING: no match found on the ISO website for #{id}. "\
|
42
|
+
"The code must be exactly like it is on the website."
|
43
|
+
warn "(There was no match for #{year}, though there were matches "\
|
44
|
+
"found for #{missed_years.join(', ')}.)" unless missed_years.empty?
|
45
|
+
if /\d-\d/.match? code
|
46
|
+
warn "The provided document part may not exist, or the document "\
|
47
|
+
"may no longer be published in parts."
|
48
|
+
else
|
49
|
+
warn "If you wanted to cite all document parts for the reference, "\
|
50
|
+
"use \"#{code} (all parts)\".\nIf the document is not a standard, "\
|
51
|
+
"use its document type abbreviation (TS, TR, PAS, Guide)."
|
52
|
+
end
|
53
|
+
nil
|
54
|
+
end
|
55
|
+
|
56
|
+
def fetch_pages(s, n)
|
57
|
+
workers = WorkersPool.new n
|
58
|
+
workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
|
59
|
+
s.each_with_index { |hit, i| workers << { i: i, hit: hit } }
|
60
|
+
workers.end
|
61
|
+
workers.result.sort { |x, y| x[:i] <=> y[:i] }.map { |x| x[:hit] }
|
62
|
+
end
|
63
|
+
|
64
|
+
def isobib_search_filter(code)
|
65
|
+
docidrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+}
|
66
|
+
corrigrx = %r{^(ISO|IEC)[^0-9]*\s[0-9-]+:[0-9]+/}
|
67
|
+
warn "fetching #{code}..."
|
68
|
+
result = search(code)
|
69
|
+
result.each do |page|
|
70
|
+
ret = page.select do |i|
|
71
|
+
i.hit["title"] &&
|
72
|
+
i.hit["title"].match(docidrx).to_s == code &&
|
73
|
+
!corrigrx.match?(i.hit["title"])
|
74
|
+
end
|
75
|
+
return ret unless ret.empty?
|
76
|
+
end
|
77
|
+
[]
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
def iev
|
82
|
+
Nokogiri::XML.fragment(<<~"END")
|
83
|
+
<bibitem type="international-standard" id="IEV">
|
84
|
+
<title format="text/plain" language="en" script="Latn">Electropedia:
|
85
|
+
The World's Online Electrotechnical Vocabulary</title>
|
86
|
+
<source type="src">http://www.electropedia.org</source>
|
87
|
+
<docidentifier>IEV</docidentifier>
|
88
|
+
<date type="published"> <on>#{Date.today.year}</on> </date>
|
89
|
+
<contributor>
|
90
|
+
<role type="publisher"/>
|
91
|
+
<organization>
|
92
|
+
<name>International Electrotechnical Commission</name>
|
93
|
+
<abbreviation>IEC</abbreviation>
|
94
|
+
<uri>www.iec.ch</uri>
|
95
|
+
</organization>
|
96
|
+
</contributor>
|
97
|
+
<language>en</language> <language>fr</language>
|
98
|
+
<script>Latn</script>
|
99
|
+
<copyright>
|
100
|
+
<from>#{Date.today.year}</from>
|
101
|
+
<owner>
|
102
|
+
<organization>
|
103
|
+
<name>International Electrotechnical Commission</name>
|
104
|
+
<abbreviation>IEC</abbreviation>
|
105
|
+
<uri>www.iec.ch</uri>
|
106
|
+
</organization>
|
107
|
+
</owner>
|
108
|
+
</copyright>
|
109
|
+
<relation type="updates">
|
110
|
+
<bibitem>
|
111
|
+
<formattedref>IEC 60050</formattedref>
|
112
|
+
</bibitem>
|
113
|
+
</relation>
|
114
|
+
</bibitem>
|
115
|
+
END
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
# Sort through the results from Isobib, fetching them three at a time,
|
120
|
+
# and return the first result that matches the code,
|
121
|
+
# matches the year (if provided), and which # has a title (amendments do not).
|
122
|
+
# Only expects the first page of results to be populated.
|
123
|
+
# Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
|
124
|
+
# If no match, returns any years which caused mismatch, for error reporting
|
125
|
+
def isobib_results_filter(result, year)
|
126
|
+
missed_years = []
|
127
|
+
result.each_slice(3) do |s| # ISO website only allows 3 connections
|
128
|
+
fetch_pages(s, 3).each_with_index do |r, i|
|
129
|
+
return { ret: r } if !year
|
130
|
+
r.dates.select { |d| d.type == "published" }.each do |d|
|
131
|
+
return { ret: r } if year.to_i == d.on.year
|
132
|
+
missed_years << d.on.year
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
{ years: missed_years }
|
137
|
+
end
|
138
|
+
|
139
|
+
def isobib_get1(code, year, opts)
|
140
|
+
return iev if code.casecmp? "IEV"
|
141
|
+
result = isobib_search_filter(code) or return nil
|
142
|
+
ret = isobib_results_filter(result, year)
|
143
|
+
return ret[:ret] if ret[:ret]
|
144
|
+
fetch_ref_err(code, year, ret[:years])
|
145
|
+
end
|
22
146
|
end
|
23
147
|
end
|
24
148
|
end
|
data/lib/isobib/scrapper.rb
CHANGED
@@ -220,7 +220,7 @@ module Isobib
|
|
220
220
|
technical_committee: {
|
221
221
|
name: wg_link.text + doc.css('div.entry-title')[0].text,
|
222
222
|
type: 'technicalCommittee',
|
223
|
-
number: workgroup[1]
|
223
|
+
number: workgroup[1]&.match(/\d+/)&.to_s&.to_i
|
224
224
|
} }
|
225
225
|
end
|
226
226
|
|
data/lib/isobib/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: isobib
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-06-
|
11
|
+
date: 2018-06-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: byebug
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: rake
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|