relaton-itu 1.14.0 → 1.14.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -0
- data/.github/workflows/release.yml +3 -2
- data/.gitignore +1 -0
- data/.rubocop.yml +1 -1
- data/Gemfile +9 -0
- data/README.adoc +21 -2
- data/grammars/basicdoc.rng +0 -1
- data/grammars/biblio.rng +12 -2
- data/lib/relaton_itu/data_fetcher.rb +147 -0
- data/lib/relaton_itu/data_parser_r.rb +94 -0
- data/lib/relaton_itu/hit.rb +1 -1
- data/lib/relaton_itu/hit_collection.rb +17 -15
- data/lib/relaton_itu/itu_bibliography.rb +9 -9
- data/lib/relaton_itu/processor.rb +20 -0
- data/lib/relaton_itu/scrapper.rb +2 -2
- data/lib/relaton_itu/version.rb +1 -1
- data/lib/relaton_itu.rb +4 -1
- data/relaton-itu.gemspec +7 -16
- metadata +16 -112
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6766812ad6660d739d3d4e4c1778846119eaca063c3c5cfc34242ffca6414a6b
|
4
|
+
data.tar.gz: 7c3024fc052ddd75a7c82ab1b3231834d6b0c964cf940f5fa1e2c28c0ff5c085
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 76647371a1ef4b6eaf3a4f57bd8bff607dda86f63f0f928e8969d9e9a25532eb456f7781740068f094b1fa4d9b505092ff7710e5fb16b9bdbd168207abc70f23
|
7
|
+
data.tar.gz: d87e9793f3802bde3d39d672c7c10900f5fd1298811bf6bb0fa129f577020c5e3cbae9d52ef5623e6d3e7933c322b2cdc555e824830cfa4707e48169fe92cd73
|
data/.github/workflows/rake.yml
CHANGED
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
data/Gemfile
CHANGED
@@ -4,3 +4,12 @@ Encoding.default_internal = Encoding::UTF_8
|
|
4
4
|
source "https://rubygems.org"
|
5
5
|
# Specify your gem's dependencies in relaton_itu.gemspec
|
6
6
|
gemspec
|
7
|
+
|
8
|
+
gem "equivalent-xml", "~> 0.6"
|
9
|
+
gem "pry-byebug"
|
10
|
+
gem "rake", "~> 13.0"
|
11
|
+
gem "rspec", "~> 3.0"
|
12
|
+
gem "ruby-jing"
|
13
|
+
gem "simplecov"
|
14
|
+
gem "vcr"
|
15
|
+
gem "webmock"
|
data/README.adoc
CHANGED
@@ -69,7 +69,7 @@ item.to_xml bibdata: true
|
|
69
69
|
</bibdata>"
|
70
70
|
----
|
71
71
|
|
72
|
-
=== Get code
|
72
|
+
=== Get document by code and year
|
73
73
|
[source,ruby]
|
74
74
|
----
|
75
75
|
RelatonItu::ItuBibliography.get("ITU-T L.163", "2018", {})
|
@@ -123,7 +123,7 @@ item.link
|
|
123
123
|
#<RelatonBib::TypedUri:0x00007f82d665f0f8 @content=#<Addressable::URI:0xc184 URI:https://www.itu.inthttp//handle.itu.int/11.1002/1000/13786-en?locatt=format:pdf&auth>, @type="obp">]
|
124
124
|
----
|
125
125
|
|
126
|
-
=== Create bibliographic item from YAML
|
126
|
+
=== Create a bibliographic item from YAML
|
127
127
|
[source,ruby]
|
128
128
|
----
|
129
129
|
hash = YAML.load_file 'spec/examples/itu_bib_item.yml'
|
@@ -135,6 +135,25 @@ RelatonItu::ItuBibliographicItem.from_hash hash
|
|
135
135
|
...
|
136
136
|
----
|
137
137
|
|
138
|
+
=== Fetch data
|
139
|
+
|
140
|
+
This gem uses the (https://extranet.itu.int/brdocsearch) dataset as a data source. +
|
141
|
+
|
142
|
+
The method `RelatonItu::DataFetcher.fetch(output: "data", format: "yaml")` fetches all the documents from the dataset and saves them to the `./data` folder in YAML format.
|
143
|
+
Arguments:
|
144
|
+
|
145
|
+
- `output` - folder to save documents (default 'data').
|
146
|
+
- `format` - the format in which the documents are saved. Possible formats are: `yaml`, `xml`, `bibxxml` (default `yaml`).
|
147
|
+
|
148
|
+
[source,ruby]
|
149
|
+
----
|
150
|
+
RelatonItu::DataFetcher.fetch output: "dir", format: "xml"
|
151
|
+
Started at: 2023-05-27 09:21:16 -0400
|
152
|
+
Stopped at: 2023-05-27 09:27:45 -0400
|
153
|
+
Done in: 390 sec.
|
154
|
+
=> nil
|
155
|
+
----
|
156
|
+
|
138
157
|
== Development
|
139
158
|
|
140
159
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/grammars/basicdoc.rng
CHANGED
data/grammars/biblio.rng
CHANGED
@@ -216,6 +216,9 @@
|
|
216
216
|
<optional>
|
217
217
|
<ref name="fullname"/>
|
218
218
|
</optional>
|
219
|
+
<zeroOrMore>
|
220
|
+
<ref name="credential"/>
|
221
|
+
</zeroOrMore>
|
219
222
|
<zeroOrMore>
|
220
223
|
<ref name="affiliation"/>
|
221
224
|
</zeroOrMore>
|
@@ -232,6 +235,11 @@
|
|
232
235
|
<ref name="FullNameType"/>
|
233
236
|
</element>
|
234
237
|
</define>
|
238
|
+
<define name="credential">
|
239
|
+
<element name="credential">
|
240
|
+
<text/>
|
241
|
+
</element>
|
242
|
+
</define>
|
235
243
|
<define name="FullNameType">
|
236
244
|
<choice>
|
237
245
|
<group>
|
@@ -305,7 +313,9 @@
|
|
305
313
|
<zeroOrMore>
|
306
314
|
<ref name="affiliationdescription"/>
|
307
315
|
</zeroOrMore>
|
308
|
-
<
|
316
|
+
<optional>
|
317
|
+
<ref name="organization"/>
|
318
|
+
</optional>
|
309
319
|
</element>
|
310
320
|
</define>
|
311
321
|
<define name="affiliationname">
|
@@ -1316,7 +1326,7 @@
|
|
1316
1326
|
<value>commentaryOf</value>
|
1317
1327
|
<value>hasCommentary</value>
|
1318
1328
|
<value>related</value>
|
1319
|
-
<value>
|
1329
|
+
<value>hasComplement</value>
|
1320
1330
|
<value>complementOf</value>
|
1321
1331
|
<value>obsoletes</value>
|
1322
1332
|
<value>obsoletedBy</value>
|
@@ -0,0 +1,147 @@
|
|
1
|
+
module RelatonItu
|
2
|
+
class DataFetcher
|
3
|
+
def initialize(output, format)
|
4
|
+
@output = output
|
5
|
+
@format = format
|
6
|
+
@ext = format.sub "bibxml", "xml"
|
7
|
+
end
|
8
|
+
|
9
|
+
def files
|
10
|
+
@files ||= []
|
11
|
+
end
|
12
|
+
|
13
|
+
def index
|
14
|
+
@index ||= Relaton::Index.find_or_create :itu, file: "index-v1.yaml"
|
15
|
+
end
|
16
|
+
|
17
|
+
# @return agent [Mechanize]
|
18
|
+
def agent
|
19
|
+
@agent ||= Mechanize.new
|
20
|
+
end
|
21
|
+
|
22
|
+
# @return workers [RelatonBib::WorkersPool]
|
23
|
+
def workers
|
24
|
+
return @workers if @workers
|
25
|
+
|
26
|
+
@workers = RelatonBib::WorkersPool.new 10
|
27
|
+
@workers.tap do |w|
|
28
|
+
w.worker { |row| parse_page(*row) }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# @param url [String]
|
33
|
+
# @param type [String]
|
34
|
+
def parse_page(url, type)
|
35
|
+
doc = agent.get url
|
36
|
+
bib = DataParserR.parse doc, url, type
|
37
|
+
write_file bib
|
38
|
+
rescue => e # rubocop:disable Style/RescueStandardError
|
39
|
+
warn e.message
|
40
|
+
warn e.backtrace
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.fetch(output: "data", format: "yaml")
|
44
|
+
t1 = Time.now
|
45
|
+
puts "Started at: #{t1}"
|
46
|
+
FileUtils.mkdir_p output
|
47
|
+
new(output, format).fetch
|
48
|
+
t2 = Time.now
|
49
|
+
puts "Stopped at: #{t2}"
|
50
|
+
puts "Done in: #{(t2 - t1).round} sec."
|
51
|
+
end
|
52
|
+
|
53
|
+
def fetch
|
54
|
+
fetch_recommendation
|
55
|
+
fetch_question
|
56
|
+
fetch_report
|
57
|
+
fetch_handbook
|
58
|
+
fetch_resolution
|
59
|
+
workers.end
|
60
|
+
workers.result
|
61
|
+
index.save
|
62
|
+
end
|
63
|
+
|
64
|
+
def fetch_recommendation
|
65
|
+
url = "https://extranet.itu.int/brdocsearch/_layouts/15/inplview.aspx?" \
|
66
|
+
"List=%7B0661B581-2413-4E84-BAB2-77E6DB27AF7F%7D&" \
|
67
|
+
"View=%7BC81191DD-48C4-4881-9CB7-FB61C683FE98%7D&" \
|
68
|
+
"ViewCount=123&" \
|
69
|
+
"IsXslView=TRUE&" \
|
70
|
+
"IsCSR=TRUE&" \
|
71
|
+
"ListViewPageUrl=https%3A%2F%2Fextranet.itu.int%2Fbrdocsearch%2FR-REC%2FForms%2Ffolders_inforce.aspx&" \
|
72
|
+
"FolderCTID=0x012001"
|
73
|
+
json_index url, "recommendation"
|
74
|
+
end
|
75
|
+
|
76
|
+
def fetch_question
|
77
|
+
url = "https://extranet.itu.int/brdocsearch/R-QUE/Forms/folders_inforce.aspx"
|
78
|
+
html_index url, "question"
|
79
|
+
end
|
80
|
+
|
81
|
+
def fetch_report
|
82
|
+
url = "https://extranet.itu.int/brdocsearch/_layouts/15/inplview.aspx?" \
|
83
|
+
"List=%7B82E4A13D-C7F3-4844-9E8A-2463C4B7784F%7D&" \
|
84
|
+
"View=%7B94CC1561-E4AC-4317-B402-AA0AADD7F414%7D&" \
|
85
|
+
"ViewCount=407&" \
|
86
|
+
"IsXslView=TRUE&" \
|
87
|
+
"IsCSR=TRUE&" \
|
88
|
+
"ListViewPageUrl=https%3A%2F%2Fextranet.itu.int%2Fbrdocsearch%2FR-REP%2FForms%2FFolders%2520InForce.aspx&" \
|
89
|
+
"FolderCTID=0x012001"
|
90
|
+
json_index url, "technical-report"
|
91
|
+
end
|
92
|
+
|
93
|
+
def fetch_handbook
|
94
|
+
url = "https://extranet.itu.int/brdocsearch/R-HDB/Forms/Folders%20InForce.aspx"
|
95
|
+
html_index url, "handbook"
|
96
|
+
end
|
97
|
+
|
98
|
+
def fetch_resolution
|
99
|
+
url = "https://extranet.itu.int/brdocsearch/R-RES/Forms/Folders%20InForce.aspx"
|
100
|
+
html_index url, "resolution"
|
101
|
+
end
|
102
|
+
|
103
|
+
# #param url [String]
|
104
|
+
# @param type [String]
|
105
|
+
def json_index(url, type) # rubocop:disable Metrics/AbcSize
|
106
|
+
result = agent.post url
|
107
|
+
json = JSON.parse result.body
|
108
|
+
json["Row"].each { |row| workers << [row["serverurl.progid"].sub(/^1/, ""), type] }
|
109
|
+
return unless json["NextHref"]
|
110
|
+
|
111
|
+
nexturl = url.sub(/(Paged|FolderCTID)=.+/, json["NextHref"].match(/(?<=aspx\?).+/).to_s)
|
112
|
+
json_index nexturl, type
|
113
|
+
end
|
114
|
+
|
115
|
+
# #param url [String]
|
116
|
+
# @param type [String]
|
117
|
+
def html_index(url, type)
|
118
|
+
resp = agent.get url
|
119
|
+
result = Nokogiri::HTML resp.body
|
120
|
+
result.xpath("//table//table/tr[position() > 1]").each do |hit|
|
121
|
+
url = hit.at("td/a")[:onclick].match(%r{https://[^']+}).to_s
|
122
|
+
workers << [url, type]
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# @param bib [RelatonItu::ItuBibliographicItem]
|
127
|
+
def write_file(bib) # rubocop:disable Metrics/AbcSize
|
128
|
+
id = bib.docidentifier[0].id.gsub(/[\s.]/, "_")
|
129
|
+
file = "#{@output}/#{id}.#{@ext}"
|
130
|
+
if files.include? file
|
131
|
+
warn "File #{file} exists."
|
132
|
+
else
|
133
|
+
files << file
|
134
|
+
end
|
135
|
+
index.add_or_update bib.docidentifier[0].id, file
|
136
|
+
File.write file, content(bib), encoding: "UTF-8"
|
137
|
+
end
|
138
|
+
|
139
|
+
def content(bib)
|
140
|
+
case @format
|
141
|
+
when "yaml" then bib.to_hash.to_yaml
|
142
|
+
when "xml" then bib.to_xml bibdata: true
|
143
|
+
when "bibxml" then bib.to_bibxml
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
module RelatonItu
|
2
|
+
module DataParserR
|
3
|
+
extend self
|
4
|
+
|
5
|
+
#
|
6
|
+
# Parse ITU-R document.
|
7
|
+
#
|
8
|
+
# @param [Mechanize::Page] doc mechanize page
|
9
|
+
# @param [String] url document url
|
10
|
+
# @param [String] type document type
|
11
|
+
#
|
12
|
+
# @return [RelatonItu::ItuBibliographicItem] bibliographic item
|
13
|
+
#
|
14
|
+
def parse(doc, url, type)
|
15
|
+
RelatonItu::ItuBibliographicItem.new(
|
16
|
+
docid: fetch_docid(doc), title: fetch_title(doc),
|
17
|
+
abstract: fetch_abstract(doc), date: fetch_date(doc), language: ["en"],
|
18
|
+
link: fetch_link(url), script: ["Latn"], docstatus: fetch_status(doc),
|
19
|
+
type: "standard", doctype: type
|
20
|
+
)
|
21
|
+
end
|
22
|
+
|
23
|
+
# @param doc [Mechanize::Page]
|
24
|
+
# @return [Araay<RelatonBib::DocumentIdentifier>]
|
25
|
+
def fetch_docid(doc)
|
26
|
+
# id = doc.at('//h3[.="Number"]/parent::td/following-sibling::td[2]').text # .match(/^[^\s\(]+/).to_s
|
27
|
+
# %r{^(?<id1>[^\s\(\/]+(\/\d+)?)(\/(?<id2>\w+[^\s\(]+))?} =~ id
|
28
|
+
id = doc.at('//div[@id="idDocSetPropertiesWebPart"]/h2').text.match(/^R-\w+-([^-]+(?:-\d{1,3})?)/)[1]
|
29
|
+
[RelatonBib::DocumentIdentifier.new(type: "ITU", id: "ITU-R #{id}", primary: true)]
|
30
|
+
# docid << RelatonBib::DocumentIdentifier.new(type: 'ITU', id: id2) if id2
|
31
|
+
# docid
|
32
|
+
end
|
33
|
+
|
34
|
+
# @param doc [Mechanize::Page]
|
35
|
+
# @return [Araay<RelatonBib::TypedTitleString>]
|
36
|
+
def fetch_title(doc)
|
37
|
+
content = doc.at('//h3[.="Title"]/parent::td/following-sibling::td[2]').text
|
38
|
+
[RelatonBib::TypedTitleString.new(type: "main", content: content, language: "en", script: "Latn")]
|
39
|
+
end
|
40
|
+
|
41
|
+
# @param doc [Mechanize::Page]
|
42
|
+
# @return [Array<RelatonBib::FormattedString>]
|
43
|
+
def fetch_abstract(doc)
|
44
|
+
doc.xpath('//h3[.="Observation"]/parent::td/following-sibling::td[2]').map do |a|
|
45
|
+
c = a.text.strip
|
46
|
+
RelatonBib::FormattedString.new content: c, language: "en", script: "Latn" unless c.empty?
|
47
|
+
end.compact
|
48
|
+
end
|
49
|
+
|
50
|
+
# @param doc [Mechanize::Page]
|
51
|
+
# @return [Araay<RelatonBib::BibliographicDate>]
|
52
|
+
def fetch_date(doc)
|
53
|
+
dates = []
|
54
|
+
date = doc.at('//h3[.="Approval_Date"]/parent::td/following-sibling::td[2]',
|
55
|
+
'//h3[.="Approval date"]/parent::td/following-sibling::td[2]',
|
56
|
+
'//h3[.="Approval year"]/parent::td/following-sibling::td[2]')
|
57
|
+
dates << parse_date(date.text, "confirmed") if date
|
58
|
+
|
59
|
+
date = doc.at('//h3[.="Version year"]/parent::td/following-sibling::td[2]')
|
60
|
+
dates << parse_date(date.text, "updated") if date
|
61
|
+
date = doc.at('//div[@id="idDocSetPropertiesWebPart"]/h2').text.match(/(?<=-)(19|20)\d{2}/)
|
62
|
+
dates << parse_date(date.to_s, "published") if date
|
63
|
+
dates
|
64
|
+
end
|
65
|
+
|
66
|
+
# @param date [String]
|
67
|
+
# @param type [String]
|
68
|
+
# @return [RelatonBib::BibliographicDate]
|
69
|
+
def parse_date(date, type)
|
70
|
+
d = case date
|
71
|
+
# when /^\d{4}$/ then date
|
72
|
+
when /(\d{4})(\d{2})/ then "#{$1}-#{$2}"
|
73
|
+
when %r{(\d{1,2})/(\d{1,2})/(\d{4})} then "#{$3}-#{$1}-#{$2}"
|
74
|
+
else date
|
75
|
+
end
|
76
|
+
RelatonBib::BibliographicDate.new(type: type, on: d)
|
77
|
+
end
|
78
|
+
|
79
|
+
# @param url [String]
|
80
|
+
# @return [Array<RelatonBib::TypedUri>]
|
81
|
+
def fetch_link(url)
|
82
|
+
[RelatonBib::TypedUri.new(type: "src", content: url)]
|
83
|
+
end
|
84
|
+
|
85
|
+
# @param doc [Mechanize::Page]
|
86
|
+
# @return [RelatonBib::DocumentStatus, nil]
|
87
|
+
def fetch_status(doc)
|
88
|
+
s = doc.at('//h3[.="Status"]/parent::td/following-sibling::td[2]')
|
89
|
+
return unless s
|
90
|
+
|
91
|
+
RelatonBib::DocumentStatus.new stage: s.text
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
data/lib/relaton_itu/hit.rb
CHANGED
@@ -8,6 +8,8 @@ module RelatonItu
|
|
8
8
|
# Page of hit collection.
|
9
9
|
class HitCollection < RelatonBib::HitCollection
|
10
10
|
DOMAIN = "https://www.itu.int"
|
11
|
+
GH_ITU_R = "https://raw.githubusercontent.com/relaton/relaton-data-itu-r/master/"
|
12
|
+
INDEX_FILE = "index-v1.yaml"
|
11
13
|
|
12
14
|
# @return [TrueClass, FalseClass]
|
13
15
|
attr_reader :gi_imp
|
@@ -23,6 +25,7 @@ module RelatonItu
|
|
23
25
|
@agent = Mechanize.new
|
24
26
|
agent.user_agent_alias = "Mac Safari"
|
25
27
|
@gi_imp = /\.Imp\d/.match?(ref)
|
28
|
+
@array = []
|
26
29
|
|
27
30
|
case ref
|
28
31
|
when /^(ITU-T|ITU-R\sRR)/
|
@@ -35,26 +38,25 @@ module RelatonItu
|
|
35
38
|
private
|
36
39
|
|
37
40
|
def request_search
|
38
|
-
url = "#{DOMAIN}/net4/ITU-T/search/GlobalSearch/
|
41
|
+
url = "#{DOMAIN}/net4/ITU-T/search/GlobalSearch/RunSearch"
|
39
42
|
data = { json: params.to_json }
|
40
|
-
resp = agent.post url, data
|
43
|
+
resp = agent.post url, data
|
41
44
|
@array = hits JSON.parse(resp.body)
|
42
45
|
end
|
43
46
|
|
44
47
|
# @param ref [String] a document ref
|
45
|
-
def request_document(ref) # rubocop:todo Metrics/MethodLength
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
48
|
+
def request_document(ref) # rubocop:todo Metrics/MethodLength, Metrics/AbcSize
|
49
|
+
index = Relaton::Index.find_or_create :itu, url: "#{GH_ITU_R}index-v1.zip", file: INDEX_FILE
|
50
|
+
row = index.search(ref).min_by { |i| i[:id] }
|
51
|
+
return unless row
|
52
|
+
|
53
|
+
uri = URI("#{GH_ITU_R}#{row[:file]}")
|
50
54
|
resp = Net::HTTP.get_response(uri)
|
51
|
-
if resp.code == "404"
|
52
|
-
@array = []
|
53
|
-
return
|
54
|
-
end
|
55
|
+
return if resp.code == "404"
|
55
56
|
|
56
57
|
hash = YAML.safe_load resp.body
|
57
58
|
item_hash = HashConverter.hash_to_bib(hash)
|
59
|
+
item_hash[:fetched] = Date.today.to_s
|
58
60
|
item = ItuBibliographicItem.new(**item_hash)
|
59
61
|
hit = Hit.new({ url: uri.to_s }, self)
|
60
62
|
hit.fetch = item
|
@@ -88,7 +90,7 @@ module RelatonItu
|
|
88
90
|
"Selected" => false,
|
89
91
|
"Value" => "",
|
90
92
|
"Label" => "Name",
|
91
|
-
"Target" => "
|
93
|
+
"Target" => "/name_s",
|
92
94
|
"TypeName" => "CHECKBOX",
|
93
95
|
"GetCriteriaType" => 0,
|
94
96
|
},
|
@@ -96,7 +98,7 @@ module RelatonItu
|
|
96
98
|
"Selected" => false,
|
97
99
|
"Value" => "",
|
98
100
|
"Label" => "Short description",
|
99
|
-
"Target" => "
|
101
|
+
"Target" => "/short_description_s",
|
100
102
|
"TypeName" => "CHECKBOX",
|
101
103
|
"GetCriteriaType" => 0,
|
102
104
|
},
|
@@ -104,7 +106,7 @@ module RelatonItu
|
|
104
106
|
"Selected" => false,
|
105
107
|
"Value" => "",
|
106
108
|
"Label" => "File content",
|
107
|
-
"Target" => "
|
109
|
+
"Target" => "/file",
|
108
110
|
"TypeName" => "CHECKBOX",
|
109
111
|
"GetCriteriaType" => 0,
|
110
112
|
},
|
@@ -125,7 +127,7 @@ module RelatonItu
|
|
125
127
|
data["results"].map do |h|
|
126
128
|
code = h["Media"]["Name"]
|
127
129
|
title = h["Title"]
|
128
|
-
url = h[
|
130
|
+
url = "#{DOMAIN}#{h['Redirection']}"
|
129
131
|
type = h["Collection"]["Group"].downcase[0...-1]
|
130
132
|
Hit.new({ code: code, title: title, url: url, type: type }, self)
|
131
133
|
end
|
@@ -59,19 +59,19 @@ module RelatonItu
|
|
59
59
|
|
60
60
|
def fetch_ref_err(code, year, missed_years) # rubocop:disable Metrics/MethodLength
|
61
61
|
id = year ? "#{code}:#{year}" : code
|
62
|
-
warn "[relaton-itu] WARNING: no match found online for #{id}. "\
|
63
|
-
|
62
|
+
warn "[relaton-itu] WARNING: no match found online for #{id}. " \
|
63
|
+
"The code must be exactly like it is on the standards website."
|
64
64
|
unless missed_years.empty?
|
65
|
-
warn "[relaton-itu] (There was no match for #{year}, though there "\
|
66
|
-
|
65
|
+
warn "[relaton-itu] (There was no match for #{year}, though there " \
|
66
|
+
"were matches found for #{missed_years.join(', ')}.)"
|
67
67
|
end
|
68
68
|
if /\d-\d/.match? code
|
69
|
-
warn "[relaton-itu] The provided document part may not exist, or "\
|
70
|
-
|
69
|
+
warn "[relaton-itu] The provided document part may not exist, or " \
|
70
|
+
"the document may no longer be published in parts."
|
71
71
|
else
|
72
|
-
warn "[relaton-itu] If you wanted to cite all document parts for the reference, "\
|
73
|
-
|
74
|
-
|
72
|
+
warn "[relaton-itu] If you wanted to cite all document parts for the reference, " \
|
73
|
+
"use \"#{code} (all parts)\".\nIf the document is not a standard, " \
|
74
|
+
"use its document type abbreviation (TS, TR, PAS, Guide)."
|
75
75
|
end
|
76
76
|
nil
|
77
77
|
end
|
@@ -7,6 +7,7 @@ module RelatonItu
|
|
7
7
|
@prefix = "ITU"
|
8
8
|
@defaultprefix = %r{^ITU\s}
|
9
9
|
@idtype = "ITU"
|
10
|
+
@datasets = %w[itu-r]
|
10
11
|
end
|
11
12
|
|
12
13
|
# @param code [String]
|
@@ -17,6 +18,18 @@ module RelatonItu
|
|
17
18
|
::RelatonItu::ItuBibliography.get(code, date, opts)
|
18
19
|
end
|
19
20
|
|
21
|
+
#
|
22
|
+
# Fetch all the documents from https://extranet.itu.int/brdocsearch/
|
23
|
+
#
|
24
|
+
# @param [String] source source name (itu-r)
|
25
|
+
# @param [Hash] opts
|
26
|
+
# @option opts [String] :output directory to output documents, default is data
|
27
|
+
# @option opts [String] :format output format, default is yaml
|
28
|
+
#
|
29
|
+
def fetch_data(_source, opts)
|
30
|
+
DataFetcher.fetch(**opts)
|
31
|
+
end
|
32
|
+
|
20
33
|
# @param xml [String]
|
21
34
|
# @return [RelatonItu::ItuBibliographicItem]
|
22
35
|
def from_xml(xml)
|
@@ -34,5 +47,12 @@ module RelatonItu
|
|
34
47
|
def grammar_hash
|
35
48
|
@grammar_hash ||= ::RelatonItu.grammar_hash
|
36
49
|
end
|
50
|
+
|
51
|
+
#
|
52
|
+
# Remove index file
|
53
|
+
#
|
54
|
+
def remove_index_file
|
55
|
+
Relaton::Index.find_or_create(:itu, url: true, file: HitCollection::INDEX_FILE).remove_file
|
56
|
+
end
|
37
57
|
end
|
38
58
|
end
|
data/lib/relaton_itu/scrapper.rb
CHANGED
@@ -27,7 +27,7 @@ module RelatonItu
|
|
27
27
|
# Parse page.
|
28
28
|
# @param hit [RelatonItu::Hit]
|
29
29
|
# @return [Hash]
|
30
|
-
def parse_page(hit, imp
|
30
|
+
def parse_page(hit, imp: false) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
31
31
|
doc = get_page hit
|
32
32
|
return unless doc.code == "200"
|
33
33
|
|
@@ -190,7 +190,7 @@ module RelatonItu
|
|
190
190
|
did = RelatonBib::DocumentIdentifier.new(id: ref.text, type: "ITU")
|
191
191
|
bibitem = ItuBibliographicItem.new(formattedref: fref, docid: [did],
|
192
192
|
type: "standard")
|
193
|
-
{ type: "
|
193
|
+
{ type: "complementOf", bibitem: bibitem }
|
194
194
|
end
|
195
195
|
end
|
196
196
|
|
data/lib/relaton_itu/version.rb
CHANGED
data/lib/relaton_itu.rb
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
require "mechanize"
|
2
|
+
require "digest/md5"
|
3
|
+
require "relaton/index"
|
2
4
|
require "relaton_itu/version"
|
3
5
|
require "relaton_itu/itu_bibliography"
|
4
|
-
require "
|
6
|
+
require "relaton_itu/data_fetcher"
|
7
|
+
require "relaton_itu/data_parser_r"
|
5
8
|
|
6
9
|
module RelatonItu
|
7
10
|
class Error < StandardError; end
|
data/relaton-itu.gemspec
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
|
2
|
-
lib = File.expand_path("../lib", __FILE__)
|
1
|
+
lib = File.expand_path("lib", __dir__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
3
|
require "relaton_itu/version"
|
5
4
|
|
@@ -9,10 +8,10 @@ Gem::Specification.new do |spec|
|
|
9
8
|
spec.authors = ["Ribose Inc."]
|
10
9
|
spec.email = ["open.source@ribose.com"]
|
11
10
|
|
12
|
-
spec.summary = "RelatonItu: retrieve ITU Standards for bibliographic
|
13
|
-
"using the BibliographicItem model"
|
14
|
-
spec.description = "RelatonItu: retrieve ITU Standards for bibliographic
|
15
|
-
"using the BibliographicItem model"
|
11
|
+
spec.summary = "RelatonItu: retrieve ITU Standards for bibliographic " \
|
12
|
+
"use using the BibliographicItem model"
|
13
|
+
spec.description = "RelatonItu: retrieve ITU Standards for bibliographic " \
|
14
|
+
"use using the BibliographicItem model"
|
16
15
|
spec.homepage = "https://github.com/metanorma/relaton-itu"
|
17
16
|
spec.license = "MIT"
|
18
17
|
|
@@ -24,17 +23,9 @@ Gem::Specification.new do |spec|
|
|
24
23
|
spec.bindir = "exe"
|
25
24
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
26
25
|
spec.require_paths = ["lib"]
|
27
|
-
spec.required_ruby_version = Gem::Requirement.new(">= 2.
|
28
|
-
|
29
|
-
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
30
|
-
spec.add_development_dependency "pry-byebug"
|
31
|
-
spec.add_development_dependency "rake", "~> 13.0"
|
32
|
-
spec.add_development_dependency "rspec", "~> 3.0"
|
33
|
-
spec.add_development_dependency "ruby-jing"
|
34
|
-
spec.add_development_dependency "simplecov"
|
35
|
-
spec.add_development_dependency "vcr", "~> 5.0.0"
|
36
|
-
spec.add_development_dependency "webmock"
|
26
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0")
|
37
27
|
|
38
28
|
spec.add_dependency "mechanize", "~> 2.8.0"
|
39
29
|
spec.add_dependency "relaton-bib", "~> 1.14.0"
|
30
|
+
spec.add_dependency "relaton-index", "~> 0.2.0"
|
40
31
|
end
|
metadata
CHANGED
@@ -1,155 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-itu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.14.
|
4
|
+
version: 1.14.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-05-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0.6'
|
20
|
-
type: :development
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0.6'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: pry-byebug
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: rake
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - "~>"
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '13.0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - "~>"
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '13.0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: rspec
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '3.0'
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - "~>"
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '3.0'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: ruby-jing
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - ">="
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '0'
|
76
|
-
type: :development
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - ">="
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: simplecov
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - ">="
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: vcr
|
14
|
+
name: mechanize
|
99
15
|
requirement: !ruby/object:Gem::Requirement
|
100
16
|
requirements:
|
101
17
|
- - "~>"
|
102
18
|
- !ruby/object:Gem::Version
|
103
|
-
version:
|
104
|
-
type: :
|
19
|
+
version: 2.8.0
|
20
|
+
type: :runtime
|
105
21
|
prerelease: false
|
106
22
|
version_requirements: !ruby/object:Gem::Requirement
|
107
23
|
requirements:
|
108
24
|
- - "~>"
|
109
25
|
- !ruby/object:Gem::Version
|
110
|
-
version:
|
111
|
-
- !ruby/object:Gem::Dependency
|
112
|
-
name: webmock
|
113
|
-
requirement: !ruby/object:Gem::Requirement
|
114
|
-
requirements:
|
115
|
-
- - ">="
|
116
|
-
- !ruby/object:Gem::Version
|
117
|
-
version: '0'
|
118
|
-
type: :development
|
119
|
-
prerelease: false
|
120
|
-
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
requirements:
|
122
|
-
- - ">="
|
123
|
-
- !ruby/object:Gem::Version
|
124
|
-
version: '0'
|
26
|
+
version: 2.8.0
|
125
27
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
28
|
+
name: relaton-bib
|
127
29
|
requirement: !ruby/object:Gem::Requirement
|
128
30
|
requirements:
|
129
31
|
- - "~>"
|
130
32
|
- !ruby/object:Gem::Version
|
131
|
-
version:
|
33
|
+
version: 1.14.0
|
132
34
|
type: :runtime
|
133
35
|
prerelease: false
|
134
36
|
version_requirements: !ruby/object:Gem::Requirement
|
135
37
|
requirements:
|
136
38
|
- - "~>"
|
137
39
|
- !ruby/object:Gem::Version
|
138
|
-
version:
|
40
|
+
version: 1.14.0
|
139
41
|
- !ruby/object:Gem::Dependency
|
140
|
-
name: relaton-
|
42
|
+
name: relaton-index
|
141
43
|
requirement: !ruby/object:Gem::Requirement
|
142
44
|
requirements:
|
143
45
|
- - "~>"
|
144
46
|
- !ruby/object:Gem::Version
|
145
|
-
version:
|
47
|
+
version: 0.2.0
|
146
48
|
type: :runtime
|
147
49
|
prerelease: false
|
148
50
|
version_requirements: !ruby/object:Gem::Requirement
|
149
51
|
requirements:
|
150
52
|
- - "~>"
|
151
53
|
- !ruby/object:Gem::Version
|
152
|
-
version:
|
54
|
+
version: 0.2.0
|
153
55
|
description: 'RelatonItu: retrieve ITU Standards for bibliographic use using the BibliographicItem
|
154
56
|
model'
|
155
57
|
email:
|
@@ -176,6 +78,8 @@ files:
|
|
176
78
|
- grammars/relaton-itu-compile.rng
|
177
79
|
- grammars/relaton-itu.rng
|
178
80
|
- lib/relaton_itu.rb
|
81
|
+
- lib/relaton_itu/data_fetcher.rb
|
82
|
+
- lib/relaton_itu/data_parser_r.rb
|
179
83
|
- lib/relaton_itu/editorial_group.rb
|
180
84
|
- lib/relaton_itu/hash_converter.rb
|
181
85
|
- lib/relaton_itu/hit.rb
|
@@ -201,14 +105,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
201
105
|
requirements:
|
202
106
|
- - ">="
|
203
107
|
- !ruby/object:Gem::Version
|
204
|
-
version: 2.
|
108
|
+
version: 2.7.0
|
205
109
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
206
110
|
requirements:
|
207
111
|
- - ">="
|
208
112
|
- !ruby/object:Gem::Version
|
209
113
|
version: '0'
|
210
114
|
requirements: []
|
211
|
-
rubygems_version: 3.
|
115
|
+
rubygems_version: 3.3.26
|
212
116
|
signing_key:
|
213
117
|
specification_version: 4
|
214
118
|
summary: 'RelatonItu: retrieve ITU Standards for bibliographic use using the BibliographicItem
|