relaton-bipm 1.14.1 → 1.14.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile +6 -0
- data/README.adoc +32 -12
- data/grammars/basicdoc.rng +0 -1
- data/grammars/biblio.rng +12 -2
- data/lib/relaton_bipm/bipm_bibliography.rb +12 -310
- data/lib/relaton_bipm/bipm_si_brochure_parser.rb +8 -4
- data/lib/relaton_bipm/comment_periond.rb +1 -1
- data/lib/relaton_bipm/data_fetcher.rb +17 -5
- data/lib/relaton_bipm/data_outcomes_parser.rb +68 -29
- data/lib/relaton_bipm/id_parser.rb +134 -0
- data/lib/relaton_bipm/processor.rb +5 -4
- data/lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb +311 -0
- data/lib/relaton_bipm/rawdata_bipm_metrologia/fetcher.rb +176 -0
- data/lib/relaton_bipm/version.rb +1 -1
- data/lib/relaton_bipm.rb +5 -1
- data/relaton_bipm.gemspec +2 -6
- metadata +26 -80
- data/lib/relaton_bipm/index.rb +0 -68
@@ -0,0 +1,176 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RelatonBipm
|
4
|
+
module RawdataBipmMetrologia
|
5
|
+
class Fetcher
|
6
|
+
DIR = "rawdata-bipm-metrologia/data/*content/0026-1394"
|
7
|
+
|
8
|
+
# @param data_fetcher [RelatonBipm::DataFetcher]
|
9
|
+
def self.fetch(data_fetcher)
|
10
|
+
new(data_fetcher).fetch
|
11
|
+
end
|
12
|
+
|
13
|
+
# @param data_fetcher [RelatonBipm::DataFetcher]
|
14
|
+
def initialize(data_fetcher)
|
15
|
+
@data_fetcher = WeakRef.new data_fetcher
|
16
|
+
end
|
17
|
+
|
18
|
+
#
|
19
|
+
# Fetch documents from rawdata-bipm-metrologia and save to files
|
20
|
+
#
|
21
|
+
def fetch
|
22
|
+
fetch_metrologia
|
23
|
+
fetch_volumes
|
24
|
+
fetch_issues
|
25
|
+
fetch_articles
|
26
|
+
end
|
27
|
+
|
28
|
+
#
|
29
|
+
# Fetch articles from rawdata-bipm-metrologia and save to files
|
30
|
+
#
|
31
|
+
def fetch_articles # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
32
|
+
Dir["#{DIR}/**/*.xml"].each do |path|
|
33
|
+
doc = Nokogiri::XML File.read(path, encoding: "UTF-8")
|
34
|
+
item = ArticleParser.parse doc.at("/article")
|
35
|
+
file = "#{item.docidentifier.first.id.downcase.gsub(' ', '-')}.#{@data_fetcher.ext}"
|
36
|
+
out_path = File.join(@data_fetcher.output, file)
|
37
|
+
@data_fetcher.index[[item.docidentifier.first.id]] = out_path
|
38
|
+
@data_fetcher.index_new.add_or_update [item.docidentifier.first.id], out_path
|
39
|
+
key = Id.new(item.docidentifier.first.id).normalized_hash
|
40
|
+
@data_fetcher.index2.add_or_update key, out_path
|
41
|
+
@data_fetcher.write_file out_path, item
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
#
|
46
|
+
# Fetch volumes from rawdata-bipm-metrologia and save to files
|
47
|
+
#
|
48
|
+
def fetch_volumes
|
49
|
+
Dir["#{DIR}/*"].map { |path| path.split("/").last }.uniq.each do |volume|
|
50
|
+
fetch_metrologia volume
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
#
|
55
|
+
# Fetch issues from rawdata-bipm-metrologia and save to files
|
56
|
+
#
|
57
|
+
def fetch_issues
|
58
|
+
Dir["#{DIR}/*/*"].each do |path|
|
59
|
+
volume, issue = path.split("/").last(2)
|
60
|
+
fetch_metrologia volume, issue
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
#
|
65
|
+
# Fetch metrologia root document from rawdata-bipm-metrologia and save to a file
|
66
|
+
#
|
67
|
+
# @overload set(volume, issue)
|
68
|
+
# @param [String] volume volume number
|
69
|
+
# @param [String] issue issue number
|
70
|
+
# @overload set(volume)
|
71
|
+
# @param [String] volume volume number
|
72
|
+
#
|
73
|
+
def fetch_metrologia(*args) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
74
|
+
id = identifier(*args)
|
75
|
+
item = BipmBibliographicItem.new(
|
76
|
+
type: "article", formattedref: formattedref(id), docid: docidentifier(id),
|
77
|
+
language: ["en"], script: ["Latn"], relation: relation(*args),
|
78
|
+
link: typed_uri(*args)
|
79
|
+
)
|
80
|
+
file = "#{id.downcase.gsub(' ', '-')}.#{@data_fetcher.ext}"
|
81
|
+
path = File.join(@data_fetcher.output, file)
|
82
|
+
@data_fetcher.index[[id]] = path
|
83
|
+
@data_fetcher.index_new.add_or_update [id], path
|
84
|
+
@data_fetcher.index2.add_or_update Id.new(id).normalized_hash, path
|
85
|
+
@data_fetcher.write_file path, item
|
86
|
+
end
|
87
|
+
|
88
|
+
#
|
89
|
+
# Create formattedref
|
90
|
+
#
|
91
|
+
# @param [String] id document identifier
|
92
|
+
#
|
93
|
+
# @return [RelatonBib::FormattedRef] formattedref
|
94
|
+
#
|
95
|
+
def formattedref(id)
|
96
|
+
RelatonBib::FormattedRef.new content: id, language: "en", script: "Latn"
|
97
|
+
end
|
98
|
+
|
99
|
+
#
|
100
|
+
# Create docidentifier
|
101
|
+
#
|
102
|
+
# @param [String] id document identifier
|
103
|
+
#
|
104
|
+
# @return [Array<RelatonBib::DocumentIdentifier>] docidentifier
|
105
|
+
#
|
106
|
+
def docidentifier(id)
|
107
|
+
[RelatonBib::DocumentIdentifier.new(id: id, type: "BIPM", primary: true)]
|
108
|
+
end
|
109
|
+
|
110
|
+
#
|
111
|
+
# Create identifier
|
112
|
+
#
|
113
|
+
# @overload set(volume, issue, article)
|
114
|
+
# @param [String] volume volume number
|
115
|
+
# @param [String] issue issue number
|
116
|
+
# @param [String] article article number
|
117
|
+
# @overload set(volume, issue)
|
118
|
+
# @param [String] volume volume number
|
119
|
+
# @param [String] issue issue number
|
120
|
+
# @overload set(volume)
|
121
|
+
# @param [String] volume volume number
|
122
|
+
#
|
123
|
+
# @return [String] document identifier
|
124
|
+
#
|
125
|
+
def identifier(*args)
|
126
|
+
["Metrologia", *id_parts(*args)].join(" ")
|
127
|
+
end
|
128
|
+
|
129
|
+
def id_parts(*args)
|
130
|
+
args.map { |p| p.match(/[^_]+$/).to_s }
|
131
|
+
end
|
132
|
+
|
133
|
+
#
|
134
|
+
# Fetch relations
|
135
|
+
#
|
136
|
+
# @param (see #fetch_metrologia)
|
137
|
+
#
|
138
|
+
# @return [Array<RelatonBib::DocumentRelation>] relations
|
139
|
+
#
|
140
|
+
def relation(*args)
|
141
|
+
dir = [DIR, *args].join("/")
|
142
|
+
Dir["#{dir}/*"].map do |path|
|
143
|
+
part = path.split("/").last
|
144
|
+
id = identifier(*args, part)
|
145
|
+
RelatonBib::DocumentRelation.new(type: "partOf", bibitem: rel_bibitem(id))
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
#
|
150
|
+
# Create relation bibitem
|
151
|
+
#
|
152
|
+
# @param [String] id document identifier
|
153
|
+
#
|
154
|
+
# @return [RelatonBipm::BipmBibliographicItem] bibitem
|
155
|
+
#
|
156
|
+
def rel_bibitem(id)
|
157
|
+
BipmBibliographicItem.new(
|
158
|
+
formattedref: formattedref(id), docid: docidentifier(id),
|
159
|
+
)
|
160
|
+
end
|
161
|
+
|
162
|
+
def typed_uri(*args)
|
163
|
+
[RelatonBib::TypedUri.new(type: "src", content: link(*args))]
|
164
|
+
end
|
165
|
+
|
166
|
+
def link(*args)
|
167
|
+
params = id_parts(*args).join("/")
|
168
|
+
case args.size
|
169
|
+
when 0 then "https://iopscience.iop.org/journal/0026-1394"
|
170
|
+
when 1 then "https://iopscience.iop.org/volume/0026-1394/#{params}"
|
171
|
+
when 2 then "https://iopscience.iop.org/issue/0026-1394/#{params}"
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
data/lib/relaton_bipm/version.rb
CHANGED
data/lib/relaton_bipm.rb
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
require "zip"
|
2
2
|
require "fileutils"
|
3
|
+
require "parslet"
|
3
4
|
require "relaton_bib"
|
5
|
+
require "relaton/index"
|
6
|
+
require "relaton_bipm/id_parser"
|
4
7
|
require "relaton_bipm/version"
|
5
8
|
require "relaton_bipm/editorial_group"
|
6
9
|
require "relaton_bipm/committee"
|
@@ -13,10 +16,11 @@ require "relaton_bipm/bipm_bibliographic_item"
|
|
13
16
|
require "relaton_bipm/bipm_bibliography"
|
14
17
|
require "relaton_bipm/hash_converter"
|
15
18
|
require "relaton_bipm/xml_parser"
|
16
|
-
require "relaton_bipm/index"
|
17
19
|
require "relaton_bipm/data_fetcher"
|
18
20
|
require "relaton_bipm/data_outcomes_parser"
|
19
21
|
require "relaton_bipm/bipm_si_brochure_parser"
|
22
|
+
require "relaton_bipm/rawdata_bipm_metrologia/fetcher"
|
23
|
+
require "relaton_bipm/rawdata_bipm_metrologia/article_parser"
|
20
24
|
|
21
25
|
module RelatonBipm
|
22
26
|
class Error < StandardError; end
|
data/relaton_bipm.gemspec
CHANGED
@@ -32,17 +32,13 @@ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
|
|
32
32
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
33
33
|
spec.require_paths = ["lib"]
|
34
34
|
|
35
|
-
spec.add_development_dependency "byebug"
|
36
35
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
37
|
-
spec.add_development_dependency "pry-byebug"
|
38
|
-
spec.add_development_dependency "ruby-jing"
|
39
|
-
spec.add_development_dependency "simplecov"
|
40
|
-
spec.add_development_dependency "vcr"
|
41
|
-
spec.add_development_dependency "webmock"
|
42
36
|
|
43
37
|
spec.add_dependency "faraday", "~> 1.0"
|
44
38
|
spec.add_dependency "mechanize", "~> 2.8.0"
|
39
|
+
spec.add_dependency "parslet", "~> 2.0.0"
|
45
40
|
spec.add_dependency "relaton-bib", "~> 1.14.0"
|
41
|
+
spec.add_dependency "relaton-index", "~> 0.1.0"
|
46
42
|
spec.add_dependency "rubyzip", "~> 2.3.0"
|
47
43
|
spec.add_dependency "serrano", "~> 1.0"
|
48
44
|
end
|
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-bipm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.14.
|
4
|
+
version: 1.14.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-03-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: byebug
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
20
|
-
type: :development
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: equivalent-xml
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -39,117 +25,75 @@ dependencies:
|
|
39
25
|
- !ruby/object:Gem::Version
|
40
26
|
version: '0.6'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: ruby-jing
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ">="
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ">="
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '0'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: simplecov
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - ">="
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '0'
|
76
|
-
type: :development
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - ">="
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: vcr
|
28
|
+
name: faraday
|
85
29
|
requirement: !ruby/object:Gem::Requirement
|
86
30
|
requirements:
|
87
|
-
- - "
|
31
|
+
- - "~>"
|
88
32
|
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
90
|
-
type: :
|
33
|
+
version: '1.0'
|
34
|
+
type: :runtime
|
91
35
|
prerelease: false
|
92
36
|
version_requirements: !ruby/object:Gem::Requirement
|
93
37
|
requirements:
|
94
|
-
- - "
|
38
|
+
- - "~>"
|
95
39
|
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
40
|
+
version: '1.0'
|
97
41
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
42
|
+
name: mechanize
|
99
43
|
requirement: !ruby/object:Gem::Requirement
|
100
44
|
requirements:
|
101
|
-
- - "
|
45
|
+
- - "~>"
|
102
46
|
- !ruby/object:Gem::Version
|
103
|
-
version:
|
104
|
-
type: :
|
47
|
+
version: 2.8.0
|
48
|
+
type: :runtime
|
105
49
|
prerelease: false
|
106
50
|
version_requirements: !ruby/object:Gem::Requirement
|
107
51
|
requirements:
|
108
|
-
- - "
|
52
|
+
- - "~>"
|
109
53
|
- !ruby/object:Gem::Version
|
110
|
-
version:
|
54
|
+
version: 2.8.0
|
111
55
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
56
|
+
name: parslet
|
113
57
|
requirement: !ruby/object:Gem::Requirement
|
114
58
|
requirements:
|
115
59
|
- - "~>"
|
116
60
|
- !ruby/object:Gem::Version
|
117
|
-
version:
|
61
|
+
version: 2.0.0
|
118
62
|
type: :runtime
|
119
63
|
prerelease: false
|
120
64
|
version_requirements: !ruby/object:Gem::Requirement
|
121
65
|
requirements:
|
122
66
|
- - "~>"
|
123
67
|
- !ruby/object:Gem::Version
|
124
|
-
version:
|
68
|
+
version: 2.0.0
|
125
69
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
70
|
+
name: relaton-bib
|
127
71
|
requirement: !ruby/object:Gem::Requirement
|
128
72
|
requirements:
|
129
73
|
- - "~>"
|
130
74
|
- !ruby/object:Gem::Version
|
131
|
-
version:
|
75
|
+
version: 1.14.0
|
132
76
|
type: :runtime
|
133
77
|
prerelease: false
|
134
78
|
version_requirements: !ruby/object:Gem::Requirement
|
135
79
|
requirements:
|
136
80
|
- - "~>"
|
137
81
|
- !ruby/object:Gem::Version
|
138
|
-
version:
|
82
|
+
version: 1.14.0
|
139
83
|
- !ruby/object:Gem::Dependency
|
140
|
-
name: relaton-
|
84
|
+
name: relaton-index
|
141
85
|
requirement: !ruby/object:Gem::Requirement
|
142
86
|
requirements:
|
143
87
|
- - "~>"
|
144
88
|
- !ruby/object:Gem::Version
|
145
|
-
version: 1.
|
89
|
+
version: 0.1.0
|
146
90
|
type: :runtime
|
147
91
|
prerelease: false
|
148
92
|
version_requirements: !ruby/object:Gem::Requirement
|
149
93
|
requirements:
|
150
94
|
- - "~>"
|
151
95
|
- !ruby/object:Gem::Version
|
152
|
-
version: 1.
|
96
|
+
version: 0.1.0
|
153
97
|
- !ruby/object:Gem::Dependency
|
154
98
|
name: rubyzip
|
155
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -216,8 +160,10 @@ files:
|
|
216
160
|
- lib/relaton_bipm/document_relation.rb
|
217
161
|
- lib/relaton_bipm/editorial_group.rb
|
218
162
|
- lib/relaton_bipm/hash_converter.rb
|
219
|
-
- lib/relaton_bipm/
|
163
|
+
- lib/relaton_bipm/id_parser.rb
|
220
164
|
- lib/relaton_bipm/processor.rb
|
165
|
+
- lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb
|
166
|
+
- lib/relaton_bipm/rawdata_bipm_metrologia/fetcher.rb
|
221
167
|
- lib/relaton_bipm/structured_identifier.rb
|
222
168
|
- lib/relaton_bipm/version.rb
|
223
169
|
- lib/relaton_bipm/workgroup.rb
|
data/lib/relaton_bipm/index.rb
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
module RelatonBipm
|
2
|
-
class Index
|
3
|
-
#
|
4
|
-
# Initialize index
|
5
|
-
#
|
6
|
-
def initialize
|
7
|
-
read_index_file || get_index_from_gh
|
8
|
-
end
|
9
|
-
|
10
|
-
#
|
11
|
-
# Search index entry
|
12
|
-
#
|
13
|
-
# @param [String] ref reference
|
14
|
-
#
|
15
|
-
# @return [String] path to document file
|
16
|
-
#
|
17
|
-
def search(ref)
|
18
|
-
@index.detect { |key, _| key.include? ref }&.last
|
19
|
-
end
|
20
|
-
|
21
|
-
private
|
22
|
-
|
23
|
-
#
|
24
|
-
# Create dir if need and return path to index file
|
25
|
-
#
|
26
|
-
# @return [String] path to index file
|
27
|
-
#
|
28
|
-
def path
|
29
|
-
@path ||= begin
|
30
|
-
dir = File.join Dir.home, ".relaton", "bipm"
|
31
|
-
FileUtils.mkdir_p dir
|
32
|
-
File.join dir, "index.yaml"
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
#
|
37
|
-
# Read index from file if it exists and not outdated
|
38
|
-
#
|
39
|
-
# @return [Hash, nil] index content
|
40
|
-
#
|
41
|
-
def read_index_file
|
42
|
-
return if !File.exist?(path) || File.ctime(path).to_date < Date.today
|
43
|
-
|
44
|
-
@index = RelatonBib.parse_yaml File.read(path, encoding: "UTF-8")
|
45
|
-
end
|
46
|
-
|
47
|
-
#
|
48
|
-
# Save index to file
|
49
|
-
#
|
50
|
-
# @return [<Type>] <description>
|
51
|
-
#
|
52
|
-
def save_index_file
|
53
|
-
File.write path, @index.to_yaml, encoding: "UTF-8"
|
54
|
-
end
|
55
|
-
|
56
|
-
#
|
57
|
-
# Get index from a GitHub repository
|
58
|
-
#
|
59
|
-
# @return [Hash] index content
|
60
|
-
#
|
61
|
-
def get_index_from_gh # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
62
|
-
resp = Zip::InputStream.new URI("#{BipmBibliography::GH_ENDPOINT}index.zip").open
|
63
|
-
zip = resp.get_next_entry
|
64
|
-
@index = RelatonBib.parse_yaml zip.get_input_stream.read
|
65
|
-
save_index_file
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|