isobib 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +11 -11
- data/README.adoc +134 -146
- data/isobib.gemspec +0 -4
- data/lib/isobib/scrapper.rb +19 -7
- data/lib/isobib/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: abed1f336c905490a2662c336e72ef4eee4c524d
|
4
|
+
data.tar.gz: 978cbaefdeaa2f2f02273c8f0b6b59336b070814
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0bd29b21d85b2942c982ef08ef1c3cbe882cc7c1127d42780ab38bbb2c433774a1bcb60b343d5b9d4e518e03c3012de735f833dcd3106b37bd4901ef4ec3d734
|
7
|
+
data.tar.gz: d298d6a65852cf4cec3ebb94c6998f890922503e8e27822ade25c2963a9412c9b382fe4f2d765f3058bc9db725a29505eb35361f2a6fe51f0ae68bb527959c23
|
data/Gemfile.lock
CHANGED
@@ -1,25 +1,25 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
isobib (0.1.
|
4
|
+
isobib (0.1.5)
|
5
5
|
algoliasearch
|
6
|
-
iso-bib-item
|
6
|
+
iso-bib-item (~> 0.1.2)
|
7
7
|
|
8
8
|
GEM
|
9
9
|
remote: https://rubygems.org/
|
10
10
|
specs:
|
11
|
-
algoliasearch (1.
|
11
|
+
algoliasearch (1.22.0)
|
12
12
|
httpclient (~> 2.8, >= 2.8.3)
|
13
13
|
json (>= 1.5.1)
|
14
|
-
byebug (10.0.
|
14
|
+
byebug (10.0.2)
|
15
15
|
coderay (1.1.2)
|
16
16
|
diff-lcs (1.3)
|
17
|
-
docile (1.1
|
17
|
+
docile (1.3.1)
|
18
18
|
httpclient (2.8.3)
|
19
|
-
iso-bib-item (0.1.
|
20
|
-
isoics
|
19
|
+
iso-bib-item (0.1.5)
|
20
|
+
isoics (~> 0.1.6)
|
21
21
|
nokogiri
|
22
|
-
isoics (0.1.
|
22
|
+
isoics (0.1.6)
|
23
23
|
json (2.1.0)
|
24
24
|
method_source (0.9.0)
|
25
25
|
mini_portile2 (2.3.0)
|
@@ -45,8 +45,8 @@ GEM
|
|
45
45
|
diff-lcs (>= 1.2.0, < 2.0)
|
46
46
|
rspec-support (~> 3.7.0)
|
47
47
|
rspec-support (3.7.1)
|
48
|
-
simplecov (0.
|
49
|
-
docile (~> 1.1
|
48
|
+
simplecov (0.16.1)
|
49
|
+
docile (~> 1.1)
|
50
50
|
json (>= 1.8, < 3)
|
51
51
|
simplecov-html (~> 0.10.0)
|
52
52
|
simplecov-html (0.10.2)
|
@@ -63,4 +63,4 @@ DEPENDENCIES
|
|
63
63
|
simplecov
|
64
64
|
|
65
65
|
BUNDLED WITH
|
66
|
-
1.16.
|
66
|
+
1.16.2
|
data/README.adoc
CHANGED
@@ -4,169 +4,157 @@ IsoBib is a Ruby gem that implements the https://github.com/riboseinc/isodoc-mod
|
|
4
4
|
|
5
5
|
You can use it to retrieve metadata of ISO Standards from https://www.iso.org, and access such metadata through the `IsoBibliographicItem` object.
|
6
6
|
|
7
|
+
== Installation
|
7
8
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
=== Search for a standard using keywords
|
9
|
+
Add this line to your application's Gemfile:
|
12
10
|
|
13
11
|
[source,ruby]
|
14
12
|
----
|
15
|
-
|
16
|
-
=> [
|
17
|
-
<<IsoBibliographicItem: docIdentifier <<projectNumber:19115; partNumber: 1>>; edition: 1 ...>>,
|
18
|
-
<<IsoBibliographicItem: docIdentifier <<projectNumber:19115; partNumber: 1>>; edition: 2 ...>>,
|
19
|
-
...
|
20
|
-
]
|
21
|
-
|
22
|
-
collection = Isobib::IsoBibliography.search("19115")
|
23
|
-
iso191151 = collection.first
|
24
|
-
iso191152 = collection[1]
|
13
|
+
gem 'isobib'
|
25
14
|
----
|
26
15
|
|
16
|
+
And then execute:
|
27
17
|
|
28
|
-
|
18
|
+
$ bundle
|
29
19
|
|
30
|
-
|
31
|
-
----
|
32
|
-
iso191151.title
|
33
|
-
=> [
|
34
|
-
<<IsoLocalizedTitle: titleIntro: "Geographic information"; titleMain: "Metadata"; titlePart: "Part 1: Fundamentals"; language: "en"; script: "latn">>,
|
35
|
-
<<IsoLocalizedTitle: titleIntro: "Information géographique"; titleMain: "Métadonnées"; titlePart: "Partie 1: Principes de base"; language: "fr"; script: "latn">>
|
36
|
-
]
|
37
|
-
|
38
|
-
iso191151.title(lang: "en")
|
39
|
-
=> <<IsoLocalizedTitle: titleIntro: "Geographic information"; titleMain: "Metadata"; titlePart: "Part 1: Fundamentals"; language: "en"; script: "latn">>,
|
20
|
+
Or install it yourself as:
|
40
21
|
|
41
|
-
|
42
|
-
=> "Geographic information -- Metadata -- Part 1: Fundamentals"
|
43
|
-
|
44
|
-
iso191151.abstract(lang: "en").to_s
|
45
|
-
=> "ISO 19115-1:2014 defines the schema required for describing geographic information and services by means of metadata. It provides information about the identification, the extent, the quality, the spatial and temporal aspects, the content, the spatial reference, the portrayal, distribution, and other properties of digital geographic data and services.
|
46
|
-
...
|
47
|
-
"
|
48
|
-
----
|
22
|
+
$ gem install isobib
|
49
23
|
|
50
|
-
|
51
|
-
|
52
|
-
----
|
53
|
-
iso191151.shortref
|
54
|
-
=> "ISO 19115-1:2014"
|
55
|
-
----
|
56
|
-
|
57
|
-
|
58
|
-
=== IsoBibliographicItem URLs
|
59
|
-
|
60
|
-
[source,ruby]
|
61
|
-
----
|
62
|
-
iso191151.url
|
63
|
-
=> "https://www.iso.org/standard/53798.html"
|
64
|
-
|
65
|
-
iso191151.url(:obp)
|
66
|
-
=> "https://www.iso.org/obp/ui/#!iso:std:53798:en"
|
67
|
-
|
68
|
-
iso191151.url(:rss)
|
69
|
-
=> "https://www.iso.org/contents/data/standard/05/37/53798.detail.rss"
|
70
|
-
|
71
|
-
iso191152.url
|
72
|
-
=> "https://www.iso.org/standard/67039.html"
|
73
|
-
----
|
74
|
-
|
75
|
-
|
76
|
-
=== IsoBibliographicItem ICS
|
77
|
-
|
78
|
-
[source,ruby]
|
79
|
-
----
|
80
|
-
iso191151.ics
|
81
|
-
=> <<BibliographicIcs code: ["35", "240", "70"]; title: "IT applications in science">>
|
82
|
-
----
|
83
|
-
|
84
|
-
=== IsoBibliographicItem BibliographicDates
|
85
|
-
|
86
|
-
[source,ruby]
|
87
|
-
----
|
88
|
-
iso191151.dates
|
89
|
-
=> [<<BibliographicDate type: "publish"; date: <<DateTime: 2014-04>> >>]
|
90
|
-
|
91
|
-
iso191151.dates.filter(type: "publish").first.date
|
92
|
-
=> <<DateTime: 2014-04>>
|
93
|
-
# The actual date/time can be found on the `url(:rss)` link
|
94
|
-
# <description>
|
95
|
-
# <![CDATA[This document reached stage 60.60 on 2014-03-19, TC/SC: ISO/TC 211, ICS: 35.240.70]]>
|
96
|
-
# </description>
|
97
|
-
----
|
98
|
-
|
99
|
-
|
100
|
-
=== IsoBibliographicItem DocumentStatus
|
101
|
-
|
102
|
-
----
|
103
|
-
iso191151.status
|
104
|
-
=> <<IsoDocumentStatus stage: 60; substage: 60, status: "Published">>
|
24
|
+
== Usage
|
105
25
|
|
106
|
-
|
107
|
-
iso191152.status
|
108
|
-
=> <<IsoDocumentStatus stage: 60; substage: 00, status: "Under development">>
|
109
|
-
----
|
26
|
+
=== Search for a standard using keywords
|
110
27
|
|
111
28
|
[source,ruby]
|
112
29
|
----
|
113
|
-
|
114
|
-
=>
|
115
|
-
|
116
|
-
|
117
|
-
|
30
|
+
Isobib::IsoBibliography.search("19115")
|
31
|
+
=> [[<Isobib::Hit:0x007fd1d9cc6980 @text="19115" @fullIdentifier="" @matchedWords=["19115"] @category="standard" @title="ISO 19115-1:2014/Amd 1:2018 ">,
|
32
|
+
<Isobib::Hit:0x007fd1d9cc6958 @text="19115" @fullIdentifier="" @matchedWords=["19115"] @category="standard" @title="ISO 19115-1:2014 Geographic information -- Metadata -- Part 1: Fundamentals">,
|
33
|
+
<Isobib::Hit:0x007fd1d9cc6930 @text="19115" @fullIdentifier="" @matchedWords=["19115"] @category="standard" @title="ISO 19115-2:2009 Geographic information -- Metadata -- Part 2: Extensions for imagery and gridded data">,
|
34
|
+
...
|
35
|
+
]]
|
118
36
|
|
119
|
-
|
120
|
-
|
37
|
+
hit_pages = Isobib::IsoBibliography.search("19115")
|
38
|
+
hit_collection = hit_pages.first
|
39
|
+
item = hit_collection[1].fetch
|
121
40
|
----
|
122
41
|
|
123
|
-
|
124
|
-
=== Document relations of a standard
|
42
|
+
=== XML serialization
|
125
43
|
|
126
44
|
[source,ruby]
|
127
45
|
----
|
128
|
-
|
129
|
-
=>
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
46
|
+
item.to_xml
|
47
|
+
=>"<bibitem type='international-standard' id='ISO19115-1'>
|
48
|
+
<title format='text/plain' language='en' script='Latn'>Geographic information -- Metadata -- Part 1: Fundamentals</title>
|
49
|
+
<title format='text/plain' language='fr' script='Latn'>Information géographique -- Métadonnées -- Partie 1: Principes de base</title>
|
50
|
+
<source type='src'>https://www.iso.org/standard/53798.html</source>
|
51
|
+
<source type='obp'>https://www.iso.org/obp/ui/#!iso:std:53798:en</source>
|
52
|
+
<source type='rss'>https://www.iso.org/contents/data/standard/05/37/53798.detail.rss</source>
|
53
|
+
<docidentifier>ISO 19115-1</docidentifier>
|
54
|
+
<date type='published'>
|
55
|
+
<on>2014</on>
|
56
|
+
</date>
|
57
|
+
<contributor>
|
58
|
+
<role type='publisher'/>
|
59
|
+
<organization>
|
60
|
+
<name>International Organization for Standardization</name>
|
61
|
+
<abbreviation>ISO</abbreviation>
|
62
|
+
<uri>www.iso.org</uri>
|
63
|
+
</organization>
|
64
|
+
</contributor>
|
65
|
+
<edition>1</edition>
|
66
|
+
<language>en</language>
|
67
|
+
<language>fr</language>
|
68
|
+
<script>Latn</script>
|
69
|
+
<abstract format='plain' language='en' script='Latn'>
|
70
|
+
ISO 19115-1:2014 defines the schema required for describing geographic
|
71
|
+
information and services by means of metadata. It provides information about
|
72
|
+
the identification, the extent, the quality, the spatial and temporal
|
73
|
+
aspects, the content, the spatial reference, the portrayal, distribution,
|
74
|
+
and other properties of digital geographic data and services.ISO
|
75
|
+
19115-1:2014 is applicable to:-the cataloguing of all types of resources,
|
76
|
+
clearinghouse activities, and the full description of datasets and
|
77
|
+
services;-geographic services, geographic datasets, dataset series, and
|
78
|
+
individual geographic features and feature properties.ISO 19115-1:2014
|
79
|
+
defines:-mandatory and conditional metadata sections, metadata entities, and
|
80
|
+
metadata elements;-the minimum set of metadata required to serve most
|
81
|
+
metadata applications (data discovery, determining data fitness for use,
|
82
|
+
data access, data transfer, and use of digital data and services);-optional
|
83
|
+
metadata elements to allow for a more extensive standard description of
|
84
|
+
resources, if required;-a method for extending metadata to fit specialized
|
85
|
+
needs.Though ISO 19115-1:2014 is applicable to digital data and services,
|
86
|
+
its principles can be extended to many other types of resources such as
|
87
|
+
maps, charts, and textual documents as well as non-geographic data. Certain
|
88
|
+
conditional metadata elements might not apply to these other forms of data.
|
89
|
+
</abstract>
|
90
|
+
<abstract format='plain' language='fr' script='Latn'>
|
91
|
+
L'ISO 19115-1:2014 définit le schéma requis pour décrire des informations
|
92
|
+
géographiques et des services au moyen de métadonnées. Elle fournit des
|
93
|
+
informations concernant l'identification, l'étendue, la qualité, les aspects
|
94
|
+
spatiaux et temporels, le contenu, la référence spatiale, la représentation
|
95
|
+
des données, la distribution et d'autres propriétés des données
|
96
|
+
géographiques numériques et des services.L'ISO 19115-1:2014 est
|
97
|
+
applicable:-au catalogage de tous les types de ressources, des activités des
|
98
|
+
centres d'informations et à la description complète des jeux de données et
|
99
|
+
des services,-aux services géographiques, jeux de données géographiques,
|
100
|
+
séries de jeux de données, entités géographiques individuelles et propriétés
|
101
|
+
d'entités.L'ISO 19115-1:2014 définit:-des sections relatives aux métadonnées
|
102
|
+
obligatoires et facultatives, aux entités de métadonnées et aux éléments de
|
103
|
+
métadonnées,-le jeu minimal de métadonnées requis pour répondre au besoin de
|
104
|
+
la plupart des applications des métadonnées (la découverte des données, la
|
105
|
+
détermination de l'adéquation des données à une utilisation, l'accès aux
|
106
|
+
données, le transfert des données et l'utilisation des données numériques et
|
107
|
+
des services),-les éléments de métadonnées facultatifs pour permettre une
|
108
|
+
description standard plus poussée des ressources, si cela est nécessaire,-un
|
109
|
+
procédé d'extension des métadonnées pour s'adapter aux besoins
|
110
|
+
spéciaux.L'ISO 19115-1:2014 est applicable aux données numériques et
|
111
|
+
services, ses principes peuvent être étendus à bien d'autres types de
|
112
|
+
ressources telles que les cartes, les graphes et les documents textes, de
|
113
|
+
même qu'à des données non géographiques. Certains éléments de métadonnées
|
114
|
+
conditionnels peuvent ne pas s'appliquer à ces autres formes de données.
|
115
|
+
</abstract>
|
116
|
+
<status>Published</status>
|
117
|
+
<copyright>
|
118
|
+
<from>2014</from>
|
119
|
+
<owner>
|
120
|
+
<organization>
|
121
|
+
<name>ISO</name>
|
122
|
+
<abbreviation/>
|
123
|
+
</organization>
|
124
|
+
</owner>
|
125
|
+
</copyright>
|
126
|
+
<relation type='obsoletes'>
|
127
|
+
<bibitem>
|
128
|
+
<formattedref>ISO 19115:2003</formattedref>
|
129
|
+
<docidentifier>ISO 19115:2003</docidentifier>
|
130
|
+
</bibitem>
|
131
|
+
</relation>
|
132
|
+
<relation type='obsoletes'>
|
133
|
+
<bibitem>
|
134
|
+
<formattedref>ISO 19115:2003/Cor 1:2006</formattedref>
|
135
|
+
<docidentifier>ISO 19115:2003/Cor 1:2006</docidentifier>
|
136
|
+
</bibitem>
|
137
|
+
</relation>
|
138
|
+
<relation type='updates'>
|
139
|
+
<bibitem>
|
140
|
+
<formattedref>ISO 19115-1:2014/Amd 1:2018</formattedref>
|
141
|
+
<docidentifier>ISO 19115-1:2014/Amd 1:2018</docidentifier>
|
142
|
+
</bibitem>
|
143
|
+
</relation>
|
144
|
+
<ics>IT applications in science</ics>
|
145
|
+
</bibitem>"
|
146
|
+
----
|
147
|
+
|
148
|
+
== Development
|
149
|
+
|
150
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
151
|
+
|
152
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
153
|
+
|
154
|
+
== Contributing
|
155
|
+
|
156
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/iso-bib-item.
|
157
|
+
|
158
|
+
== License
|
159
|
+
|
160
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/isobib.gemspec
CHANGED
@@ -33,8 +33,4 @@ Gem::Specification.new do |spec|
|
|
33
33
|
|
34
34
|
spec.add_dependency 'algoliasearch'
|
35
35
|
spec.add_dependency 'iso-bib-item', '~> 0.1.2'
|
36
|
-
# spec.add_dependency 'isoics'
|
37
|
-
# spec.add_dependency 'nokogiri'
|
38
|
-
# spec.add_dependency "capybara"
|
39
|
-
# spec.add_dependency "poltergeist"
|
40
36
|
end
|
data/lib/isobib/scrapper.rb
CHANGED
@@ -172,16 +172,16 @@ module Isobib
|
|
172
172
|
def get_page(path)
|
173
173
|
url = DOMAIN + path
|
174
174
|
uri = URI url
|
175
|
-
resp = Net::HTTP.get_response
|
175
|
+
resp = Net::HTTP.get_response(uri)#.encode("UTF-8")
|
176
176
|
if resp.code == '301'
|
177
177
|
path = resp['location']
|
178
178
|
url = DOMAIN + path
|
179
179
|
uri = URI url
|
180
|
-
resp = Net::HTTP.get_response
|
180
|
+
resp = Net::HTTP.get_response(uri)#.encode("UTF-8")
|
181
181
|
end
|
182
182
|
n = 0
|
183
183
|
while resp.body !~ /<strong/ && n < 10
|
184
|
-
resp = Net::HTTP.get_response
|
184
|
+
resp = Net::HTTP.get_response(uri)#.encode("UTF-8")
|
185
185
|
n += 1
|
186
186
|
end
|
187
187
|
[Nokogiri::HTML(resp.body), url]
|
@@ -271,8 +271,20 @@ module Isobib
|
|
271
271
|
# @param lang [String]
|
272
272
|
# @return [Hash]
|
273
273
|
def fetch_title(doc, lang)
|
274
|
-
|
274
|
+
titles = doc.css("h3[itemprop='description']")
|
275
275
|
.text.split ' -- '
|
276
|
+
case titles.size
|
277
|
+
when 0
|
278
|
+
intro, main, part = nil, "", nil
|
279
|
+
when 1
|
280
|
+
intro, main, part = nil, titles[0], nil
|
281
|
+
when 2
|
282
|
+
intro, main, part = titles[0], titles[1], nil
|
283
|
+
when 3
|
284
|
+
intro, main, part = titles[0], titles[1], titles[2]
|
285
|
+
else
|
286
|
+
intro, main, part = titles[0], titles[1], titles[2..-1]&.join(" -- ")
|
287
|
+
end
|
276
288
|
{
|
277
289
|
title_intro: intro,
|
278
290
|
title_main: main,
|
@@ -325,7 +337,7 @@ module Isobib
|
|
325
337
|
# @return [Array<Hash>]
|
326
338
|
def fetch_ics(doc)
|
327
339
|
doc.xpath('//strong[contains(text(), '\
|
328
|
-
|
340
|
+
"'ICS')]/../following-sibling::dd/div/a").map do |i|
|
329
341
|
code = i.text.match(/[\d\.]+/).to_s.split '.'
|
330
342
|
{ field: code[0], group: code[1], subgroup: code[2] }
|
331
343
|
end
|
@@ -339,7 +351,7 @@ module Isobib
|
|
339
351
|
obp_elms = doc.xpath("//a[contains(@href, '/obp/ui/')]")
|
340
352
|
obp = obp_elms.attr('href').value if obp_elms.any?
|
341
353
|
rss = DOMAIN + doc.xpath("//a[contains(@href, 'rss')]").attr('href')
|
342
|
-
|
354
|
+
.value
|
343
355
|
[
|
344
356
|
{ type: 'src', content: url },
|
345
357
|
{ type: 'obp', content: obp },
|
@@ -355,7 +367,7 @@ module Isobib
|
|
355
367
|
from = title.match(/(?<=:)\d{4}/).to_s
|
356
368
|
if from.empty?
|
357
369
|
from = doc.xpath("//span[@itemprop='releaseDate']").text
|
358
|
-
|
370
|
+
.match(/\d{4}/).to_s
|
359
371
|
end
|
360
372
|
{ owner: { name: owner_name }, from: from }
|
361
373
|
end
|
data/lib/isobib/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: isobib
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-06-
|
11
|
+
date: 2018-06-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -166,7 +166,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
166
166
|
version: '0'
|
167
167
|
requirements: []
|
168
168
|
rubyforge_project:
|
169
|
-
rubygems_version: 2.6.
|
169
|
+
rubygems_version: 2.6.12
|
170
170
|
signing_key:
|
171
171
|
specification_version: 4
|
172
172
|
summary: 'IsoBib: retrieve ISO Standards for bibliographic use using the BibliographicItem
|