relaton 2.1.0 → 2.2.0.pre.alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/rake.yml +1 -1
  3. data/CLAUDE.md +8 -0
  4. data/Gemfile +8 -0
  5. data/lib/relaton/db/cache.rb +232 -0
  6. data/lib/relaton/db/config.rb +24 -0
  7. data/lib/relaton/db/registry.rb +119 -0
  8. data/lib/relaton/db/util.rb +9 -0
  9. data/lib/relaton/db/version.rb +5 -0
  10. data/lib/relaton/db/workers_pool.rb +22 -0
  11. data/lib/relaton/db.rb +21 -206
  12. data/lib/relaton/version.rb +1 -1
  13. data/lib/relaton.rb +2 -10
  14. data/relaton.gemspec +47 -36
  15. data/spec/relaton/config_spec.rb +1 -1
  16. data/spec/relaton/db_cache_spec.rb +4 -4
  17. data/spec/relaton/db_spec.rb +18 -7
  18. data/spec/relaton/registry_spec.rb +39 -39
  19. data/spec/relaton/util_spec.rb +1 -1
  20. data/spec/relaton_meta_spec.rb +25 -0
  21. data/spec/relaton_spec.rb +163 -88
  22. data/spec/spec_helper.rb +2 -2
  23. data/spec/vcr_cassetes/api_relaton_org.yml +8 -8
  24. data/spec/vcr_cassetes/api_relaton_org_unavailable.yml +76 -79
  25. data/spec/vcr_cassetes/cc_dir_10005_2019.yml +80 -77
  26. data/spec/vcr_cassetes/cipm_meeting_43.yml +1372 -1380
  27. data/spec/vcr_cassetes/gb_t_20223_2006.yml +500 -504
  28. data/spec/vcr_cassetes/iso_19115_1.yml +17204 -13047
  29. data/spec/vcr_cassetes/iso_19115_1_2.yml +193 -240
  30. data/spec/vcr_cassetes/iso_19115_1_std.yml +17207 -13050
  31. data/spec/vcr_cassetes/iso_19115_all_parts.yml +116 -160
  32. data/spec/vcr_cassetes/iso_19133_2005.yml +78 -82
  33. data/spec/vcr_cassetes/iso_combined_applied.yml +186 -232
  34. data/spec/vcr_cassetes/iso_combined_included.yml +187 -233
  35. data/spec/vcr_cassetes/ogc_19_025r1.yml +211 -208
  36. data/spec/vcr_cassetes/omg_ami4ccm_1_0.yml +246 -246
  37. data/spec/vcr_cassetes/rfc_8341.yml +1026 -1020
  38. metadata +133 -78
  39. data/.rubocop.yml +0 -12
  40. data/lib/relaton/config.rb +0 -24
  41. data/lib/relaton/db_cache.rb +0 -230
  42. data/lib/relaton/registry.rb +0 -118
  43. data/lib/relaton/util.rb +0 -7
  44. data/lib/relaton/workers_pool.rb +0 -21
  45. data/spec/vcr_cassetes/3gpp_tr_00_01u_umts_3_0_0.yml +0 -12720
  46. data/spec/vcr_cassetes/cie_001_1980.yml +0 -401
  47. data/spec/vcr_cassetes/doi_10_6028_nist_ir_8245.yml +0 -86
  48. data/spec/vcr_cassetes/ecma_6.yml +0 -112
  49. data/spec/vcr_cassetes/en_10160_1999.yml +0 -13624
  50. data/spec/vcr_cassetes/ieee_528_2019.yml +0 -2786
  51. data/spec/vcr_cassetes/iso_dis.yml +0 -157
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5774763b35647935239061ed5da1c618f8c2c84b31adbf83afb56aa7a920da10
4
- data.tar.gz: 612177a17fbbca7af8d5a32d5a408ef82e6ce7a00a1f37e9c6e6302976d6dd70
3
+ metadata.gz: 7836d4ee81be911e0bf0a95a3b9154895cfef800e918443a54ed73d89623b268
4
+ data.tar.gz: ac550984b45d4c4b558a277c23c4a3163903bdb070760eee2395f070b75b8980
5
5
  SHA512:
6
- metadata.gz: 3284e5fc96ed6d0b4035c093d8bef53dcda4225a9477fb137610829f6676674702020541cba00ba8cc41dafcd6548eb8eafbcf6fc2e6a800b1ba844d38e71e91
7
- data.tar.gz: 97acc11738d7b016522b9b145e056c3b9fe934307a94e3d83ea5d78c2259a55260f94c49dcd5fb928bf184373fa79a37708900326323c29fc4388522da7fa88b
6
+ metadata.gz: 7fd1668edf1c684c26e3cbf3d2177654b1b48d103290101e457a9811c49dcf6dbb85fe48750985527c545f5a3e30b627a2bff5c2d39ad87e28358528d434c891
7
+ data.tar.gz: fef35f22ec455e5c1094ba2016355b64f0aaa60fb9ce569dc4e4a19ea7c8c3aa0817ea94dd7ba74c039829ecf18f6d01f4541c59734c0183a63c8b0c7632d204
@@ -4,7 +4,7 @@ name: rake
4
4
 
5
5
  on:
6
6
  push:
7
- branches: [ master, main ]
7
+ branches: [ master, main, lutaml-integration ]
8
8
  tags: [ v* ]
9
9
  pull_request:
10
10
  workflow_dispatch:
data/CLAUDE.md CHANGED
@@ -63,6 +63,14 @@ Thread pool for `fetch_async`. Default 10 threads per processor, overridable via
63
63
  - Tests create `testcache`/`testcache2` directories and clean them in `before(:each)`
64
64
  - Cache-related tests need `<fetched>` elements in XML for `valid_entry?` to return true
65
65
  - Integration tests in `spec/relaton_spec.rb`; unit tests mirror `lib/` structure under `spec/relaton/`
66
+ - **ISO lookups are stubbed, not cassette-recorded.** Flavor gems (relaton-iso/iec/nist)
67
+ fetch a large live `index-v2` and deserialize every id through a pinned pubid build,
68
+ so a single drifted id in the live index makes the whole index unparseable and ISO
69
+ lookups return `nil`. Umbrella specs therefore stub `Relaton::Iso::Bibliography.get`
70
+ (and other flavors' `.get`) to return hand-built `ItemData` — the umbrella's job is to
71
+ test `Db` orchestration (`combine_doc`, caching, api fallback), not relaton-iso's index.
72
+ Build stub items with the `docidentifier:` key (not `docid:`) so the id survives the
73
+ cache XML round-trip. Don't reintroduce a live-index cassette for these.
66
74
 
67
75
  ## Style
68
76
 
data/Gemfile CHANGED
@@ -3,6 +3,14 @@ source "https://rubygems.org"
3
3
  # Specify your gem's dependencies in gemspec
4
4
  gemspec
5
5
 
6
+ # Use local monorepo sibling gems where available.
7
+ Dir["../*/"].each do |dir|
8
+ name = File.basename(dir)
9
+ next if name == File.basename(__dir__)
10
+ next unless File.exist?(File.join(dir, "#{name}.gemspec"))
11
+ gem name, path: dir
12
+ end
13
+
6
14
 
7
15
 
8
16
  gem "byebug"
@@ -0,0 +1,232 @@
1
+ require "fileutils"
2
+ require "timeout"
3
+
4
+ module Relaton
5
+ class Db
6
+ class Cache
7
+ # @return [String]
8
+ attr_reader :dir
9
+
10
+ # @param dir [String] DB directory
11
+ def initialize(dir, ext = "xml")
12
+ @dir = dir
13
+ @ext = ext
14
+ FileUtils::mkdir_p dir
15
+ end
16
+
17
+ # Move caches to anothe dir
18
+ # @param new_dir [String, nil]
19
+ # @return [String, nil]
20
+ def mv(new_dir)
21
+ return unless new_dir && @ext == "xml"
22
+
23
+ if File.exist? new_dir
24
+ Util.info "target directory exists `#{new_dir}`"
25
+ return
26
+ end
27
+
28
+ FileUtils.mv dir, new_dir
29
+ @dir = new_dir
30
+ end
31
+
32
+ # Clear database
33
+ def clear
34
+ FileUtils.rm_rf Dir.glob "#{dir}/*"
35
+ end
36
+
37
+ # Save item
38
+ # @param key [String]
39
+ # @param value [String] Bibitem xml serialization
40
+ def []=(key, value)
41
+ if value.nil?
42
+ delete key
43
+ return
44
+ end
45
+
46
+ prefix_dir = "#{@dir}/#{prefix(key)}"
47
+ FileUtils::mkdir_p prefix_dir
48
+ set_version prefix_dir
49
+ file_safe_write "#{filename(key)}.#{ext(value)}", value
50
+ end
51
+
52
+ # @param value [String]
53
+ # @return [String]
54
+ def ext(value)
55
+ case value
56
+ when /^not_found/ then "notfound"
57
+ when /^redirection/ then "redirect"
58
+ else @ext
59
+ end
60
+ end
61
+
62
+ # Read item
63
+ # @param key [String]
64
+ # @return [String]
65
+ def [](key)
66
+ value = get(key)
67
+ if (code = redirect_code value)
68
+ self[code]
69
+ else
70
+ value
71
+ end
72
+ end
73
+
74
+ #
75
+ # Save entry from cache of `db` to this cache.
76
+ #
77
+ # @param [String] key key of the entry
78
+ # @param [Relaton::Db] db database
79
+ #
80
+ def clone_entry(key, db)
81
+ self[key] ||= db.get(key)
82
+ if (code = redirect_code get(key))
83
+ clone_entry code, db
84
+ end
85
+ end
86
+
87
+ # Return fetched date
88
+ # @param key [String]
89
+ # @return [String]
90
+ def fetched(key)
91
+ value = self[key]
92
+ return unless value
93
+
94
+ if value.match?(/^not_found/)
95
+ value.match(/\d{4}-\d{2}-\d{2}/).to_s
96
+ else
97
+ doc = Nokogiri::XML value
98
+ doc.at("/bibitem/fetched|bibdata/fetched")&.text
99
+ end
100
+ end
101
+
102
+ # Returns all items
103
+ # @return [Array<String>]
104
+ def all(&block)
105
+ Dir.glob("#{@dir}/**/*.{xml,yml,yaml}").map do |f|
106
+ content = File.read(f, encoding: "utf-8")
107
+ block ? yield(f, content) : content
108
+ end
109
+ end
110
+
111
+ # Delete item
112
+ # @param key [String]
113
+ def delete(key)
114
+ file = filename key
115
+ f = search_ext file
116
+ return unless f
117
+
118
+ if File.extname(f) == ".redirect"
119
+ code = redirect_code get(key)
120
+ delete code if code
121
+ end
122
+ File.delete f
123
+ end
124
+
125
+ # Check if version of the DB match to the gem grammar hash.
126
+ # @param fdir [String] dir pathe to flover cache
127
+ # @return [Boolean]
128
+ def check_version?(fdir)
129
+ version_dir = "#{fdir}/version"
130
+ return false unless File.exist? version_dir
131
+
132
+ v = File.read version_dir, encoding: "utf-8"
133
+ v.strip == self.class.grammar_hash(fdir)
134
+ end
135
+
136
+ # if cached reference is undated, expire it after 60 days
137
+ # @param key [String]
138
+ # @param year [String]
139
+ def valid_entry?(key, year)
140
+ datestr = fetched key
141
+ return false unless datestr
142
+
143
+ date = Date.parse datestr
144
+ year || Date.today - date < 60
145
+ end
146
+
147
+ # Reads file by a key
148
+ #
149
+ # @param key [String]
150
+ # @return [String, NilClass]
151
+ def get(key)
152
+ file = filename key
153
+ return unless (f = search_ext(file))
154
+
155
+ File.read(f, encoding: "utf-8")
156
+ end
157
+
158
+ # @param fdir [String] dir pathe to flover cache
159
+ # @return [String]
160
+ def self.grammar_hash(fdir)
161
+ type = fdir.split("/").last
162
+ Registry.instance.by_type(type)&.grammar_hash
163
+ end
164
+
165
+ private
166
+
167
+ # @param value [String]
168
+ # @return [String]
169
+ def filename(key)
170
+ prefcode = key.downcase.match(/^(?<prefix>[^(]+)\((?<code>[^)]+)/)
171
+ fn = if prefcode
172
+ "#{prefcode[:prefix]}/#{prefcode[:code].gsub(/[:\s\/()]/,
173
+ '_').squeeze('_')}"
174
+ else
175
+ key.gsub(/[-:\s]/, "_")
176
+ end
177
+ "#{@dir}/#{fn.sub(/(,|_$)/, '')}"
178
+ end
179
+
180
+ #
181
+ # Checks if there is file with xml or txt extension and return filename with
182
+ # the extension.
183
+ #
184
+ # @param file [String]
185
+ # @return [String, NilClass]
186
+ def search_ext(file)
187
+ if File.exist?("#{file}.#{@ext}")
188
+ "#{file}.#{@ext}"
189
+ elsif File.exist? "#{file}.notfound"
190
+ "#{file}.notfound"
191
+ elsif File.exist? "#{file}.redirect"
192
+ "#{file}.redirect"
193
+ end
194
+ end
195
+
196
+ # Set version of the DB to the gem grammar hash.
197
+ # @param fdir [String] dir pathe to flover cache
198
+ def set_version(fdir)
199
+ file_version = "#{fdir}/version"
200
+ unless File.exist? file_version
201
+ file_safe_write file_version, self.class.grammar_hash(fdir)
202
+ end
203
+ end
204
+
205
+ # Return item's file name
206
+ # @param key [String]
207
+ # @return [String]
208
+ def prefix(key)
209
+ key.downcase.match(/^[^(]+(?=\()/).to_s
210
+ end
211
+
212
+ # Check if a file content is redirection
213
+ #
214
+ # @prarm value [String] file content
215
+ # @return [String, NilClass] redirection code or nil
216
+ def redirect_code(value)
217
+ %r{redirection\s(?<code>.*)} =~ value
218
+ code
219
+ end
220
+
221
+ # @param file [String]
222
+ # @content [String]
223
+ def file_safe_write(file, content)
224
+ File.open file, File::RDWR | File::CREAT, encoding: "UTF-8" do |f|
225
+ Timeout.timeout(10) { f.flock File::LOCK_EX }
226
+ f.write content
227
+ f.flock File::LOCK_UN
228
+ end
229
+ end
230
+ end
231
+ end
232
+ end
@@ -0,0 +1,24 @@
1
+ module Relaton
2
+ class Db
3
+ module Config
4
+ def configure
5
+ yield configuration if block_given?
6
+ end
7
+
8
+ def configuration
9
+ @configuration ||= Configuration.new
10
+ end
11
+ end
12
+
13
+ class Configuration
14
+ attr_accessor :use_api, :api_host
15
+
16
+ def initialize
17
+ @use_api = false
18
+ @api_host = "https://api.relaton.org"
19
+ end
20
+ end
21
+
22
+ extend Config
23
+ end
24
+ end
@@ -0,0 +1,119 @@
1
+ require "singleton"
2
+
3
+ module Relaton
4
+ class Db
5
+ class Registry
6
+ SUPPORTED_GEMS = %w[
7
+ relaton/gb relaton/iec relaton/ietf relaton/iso
8
+ relaton/itu relaton/nist relaton/ogc relaton/calconnect
9
+ relaton/omg relaton/un relaton/w3c relaton/ieee
10
+ relaton/iho relaton/bipm relaton/ecma relaton/cie
11
+ relaton/bsi relaton/cen relaton/iana relaton/3gpp
12
+ relaton/oasis relaton/doi relaton/jis relaton/xsf
13
+ relaton/ccsds relaton/etsi relaton/isbn relaton/plateau
14
+ ].freeze
15
+
16
+ include Singleton
17
+
18
+ attr_reader :processors
19
+
20
+ def initialize
21
+ @processors = {}
22
+ register_gems
23
+ end
24
+
25
+ def register_gems
26
+ SUPPORTED_GEMS.each do |b|
27
+ # Require the gem's top-level file so its internal modules
28
+ # (e.g. Relaton::Iso::Util) load in the right order before we
29
+ # poke at its Processor class.
30
+ require b
31
+ require "#{b}/processor"
32
+ register Kernel.const_get "#{gem_to_module_path(b)}::Processor"
33
+ rescue LoadError => e
34
+ Util.error "backend #{b} not present\n" \
35
+ "#{e.message}\n#{e.backtrace.join "\n"}"
36
+ end
37
+ end
38
+
39
+ def register(processor)
40
+ raise Error unless processor < Core::Processor
41
+
42
+ p = processor.new
43
+ return if processors[p.short]
44
+
45
+ Util.debug("processor \"#{p.short}\" registered")
46
+ processors[p.short] = p
47
+ end
48
+
49
+ def find_processor(short)
50
+ processors[short.to_sym]
51
+ end
52
+
53
+ # @return [Array<Symbol>]
54
+ def supported_processors
55
+ processors.keys
56
+ end
57
+
58
+ #
59
+ # Search a rpocessos by dataset name
60
+ #
61
+ # @param [String] dataset
62
+ #
63
+ # @return [Relaton::Core::Processor, nil]
64
+ #
65
+ def find_processor_by_dataset(dataset)
66
+ processors.values.detect { |p| p.datasets&.include? dataset }
67
+ end
68
+
69
+ #
70
+ # Find processor by type
71
+ #
72
+ # @param type [String]
73
+ # @return [Relaton::Core::Processor]
74
+ def by_type(type)
75
+ processors.values.detect { |v| v.prefix == type&.upcase }
76
+ end
77
+
78
+ def [](stdclass)
79
+ processors[stdclass]
80
+ end
81
+
82
+ #
83
+ # Find processor by reference or prefix
84
+ #
85
+ # @param [String] ref reference or prefix
86
+ #
87
+ # @return [Relaton::Core::Processor] processor
88
+ #
89
+ def processor_by_ref(ref)
90
+ processors[class_by_ref(ref)]
91
+ end
92
+
93
+ #
94
+ # Find processor by refernce or prefix
95
+ #
96
+ # @param ref [String] reference or prefix
97
+ #
98
+ # @return [Symbol, nil] standard class name
99
+ #
100
+ def class_by_ref(ref)
101
+ ref = Regexp.last_match(1) if ref =~ /^\w+\((.*)\)$/
102
+ @processors.each do |class_name, processor|
103
+ return class_name if /^(urn:)?#{processor.prefix}\b/i.match?(ref) ||
104
+ processor.defaultprefix.match(ref)
105
+ end
106
+ Util.info "`#{ref}` does not have a recognised prefix", key: ref
107
+ nil
108
+ end
109
+
110
+ private
111
+
112
+ def gem_to_module_path(gem_name)
113
+ gem_name.split("/").map do |part|
114
+ part.capitalize.sub("3gpp", "ThreeGpp")
115
+ end.join("::")
116
+ end
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,9 @@
1
+ module Relaton
2
+ class Db
3
+ module Util
4
+ extend Relaton::Bib::Util
5
+
6
+ PROGNAME = "relaton-db".freeze
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,5 @@
1
+ module Relaton
2
+ class Db
3
+ VERSION = "2.2.0.pre.alpha.1".freeze
4
+ end
5
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Relaton
4
+ class Db
5
+ # Workers poll.
6
+ class WorkersPool
7
+ def initialize(workers = 2, &)
8
+ @queue = SizedQueue.new(workers * 2)
9
+ @threads = Array.new workers do
10
+ Thread.new do
11
+ while item = @queue.pop; yield(item) end
12
+ end
13
+ end
14
+ end
15
+
16
+ def <<(item)
17
+ @queue << item
18
+ self
19
+ end
20
+ end
21
+ end
22
+ end