digital_scriptorium 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 90d97605a47a87aec5fb8dc41385f9ce55e415d8d96bd51bc91f0436b8d5ec07
4
- data.tar.gz: 873ab9fbb3d1fb275419ec753c36929f1a806fe40a9023aaa3279b2684871218
3
+ metadata.gz: 62b823f25e2940c6a68ee4ea8db949cebbdc95f9d31682d72a60dbbb62c0a0ff
4
+ data.tar.gz: 53c69f7e20af8b7efc327214c4bfbb8378f6d80133106458231277961f4ed613
5
5
  SHA512:
6
- metadata.gz: 0142d571dd96cd21270a782c0327bf0798af63e00d904eb71263d7a43fb8eb69e4fbe2854ad75dcf4b4bed7b56148e81a4a0be1862bff0e76168572126b94532
7
- data.tar.gz: e51efd7f188fcc8bc29e846d50b6fceeb6a61ec00ccb1217807ed124303060290a6f6890980b25ab54adae11283896319af3ba0af63b6ca1f4d890a7bf2243e4
6
+ metadata.gz: 20ae390598e32c3276426c98dd437cd9d097e485d50bcb86757058defbbce42408a3a00b8c20fec9f86f2010078483bec4e1cd3053fc39ad42a462cf283e30e4
7
+ data.tar.gz: e200d3a053fba432f9819c68ceb3c48db5ad7d6daafc648f39ef5039b0fba345275c04f00215aff8c369fae2cf897e3eef31152bf93d375fdfa560b55b86564c
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DigitalScriptorium
4
- VERSION = '0.2.0'
4
+ VERSION = '0.2.1'
5
5
  end
metadata CHANGED
@@ -1,56 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digital_scriptorium
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Holloway
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-01-17 00:00:00.000000000 Z
10
+ date: 2025-01-18 00:00:00.000000000 Z
11
11
  dependencies:
12
- - !ruby/object:Gem::Dependency
13
- name: multi_json
14
- requirement: !ruby/object:Gem::Requirement
15
- requirements:
16
- - - "~>"
17
- - !ruby/object:Gem::Version
18
- version: '1.15'
19
- type: :runtime
20
- prerelease: false
21
- version_requirements: !ruby/object:Gem::Requirement
22
- requirements:
23
- - - "~>"
24
- - !ruby/object:Gem::Version
25
- version: '1.15'
26
- - !ruby/object:Gem::Dependency
27
- name: representable
28
- requirement: !ruby/object:Gem::Requirement
29
- requirements:
30
- - - "~>"
31
- - !ruby/object:Gem::Version
32
- version: '3.2'
33
- type: :runtime
34
- prerelease: false
35
- version_requirements: !ruby/object:Gem::Requirement
36
- requirements:
37
- - - "~>"
38
- - !ruby/object:Gem::Version
39
- version: '3.2'
40
- - !ruby/object:Gem::Dependency
41
- name: tty-spinner
42
- requirement: !ruby/object:Gem::Requirement
43
- requirements:
44
- - - "~>"
45
- - !ruby/object:Gem::Version
46
- version: '0.9'
47
- type: :runtime
48
- prerelease: false
49
- version_requirements: !ruby/object:Gem::Requirement
50
- requirements:
51
- - - "~>"
52
- - !ruby/object:Gem::Version
53
- version: '0.9'
54
12
  - !ruby/object:Gem::Dependency
55
13
  name: wikibase_representable
56
14
  requirement: !ruby/object:Gem::Requirement
@@ -161,7 +119,6 @@ files:
161
119
  - lib/digital_scriptorium/transformers/uniform_title_claim_transformer.rb
162
120
  - lib/digital_scriptorium/version.rb
163
121
  - sig/digital_scriptorium.rbs
164
- - wikibase_to_solr_new.rb
165
122
  homepage: https://github.com/mdholloway/digital_scriptorium
166
123
  licenses:
167
124
  - MIT
@@ -1,116 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'digital_scriptorium'
4
- require 'json'
5
- require 'logging'
6
- require 'optparse'
7
- require 'set'
8
- require 'time'
9
- require 'tty-spinner'
10
- require 'zlib'
11
-
12
- dir = File.dirname __FILE__
13
-
14
- input_file = File.expand_path 'wikibase_export.json.gz', dir
15
- output_file = File.expand_path 'solr_import.json', dir
16
- config_file = File.expand_path 'property_config.yml', dir
17
- pretty_print = false
18
-
19
- logger = Logging.logger($stdout)
20
-
21
- OptionParser.new { |opts|
22
- opts.banner = 'Usage: wikibase_to_solr.rb [options]'
23
-
24
- opts.on('-i', '--in FILE', 'The file path to the gzipped Wikibase JSON export file.') do |f|
25
- input_file = File.expand_path f, dir
26
- end
27
-
28
- opts.on('-o', '--out FILE', 'The file path to output the formatted Solr JSON file.') do |f|
29
- output_file = File.expand_path f, dir
30
- end
31
-
32
- opts.on('-c', '--config FILE', 'The file path to the property configuration file.') do |f|
33
- config_file = File.expand_path f, dir
34
- end
35
-
36
- opts.on('-p', '--pretty-print', 'Whether to pretty-print the JSON output.') do
37
- pretty_print = true
38
- end
39
- }.parse!
40
-
41
- def merge(solr_item, new_props)
42
- solr_item.merge(new_props) do |_, old_val, new_val|
43
- old_val.nil? ? new_val : (old_val + new_val).uniq
44
- end
45
- end
46
-
47
- def base_solr_item(meta)
48
- ds_id = meta.manuscript.ds_id
49
- {
50
- 'qid_meta' => [meta.holding.id, meta.manuscript.id, meta.record.id],
51
- 'id' => [ds_id],
52
- 'id_display' => [JSON.generate(recorded_value: ds_id)],
53
- 'id_search' => [ds_id]
54
- }
55
- end
56
-
57
- def record?(entity)
58
- entity.is_a?(DigitalScriptorium::DsItem) &&
59
- entity.claims_by_property_id?(DigitalScriptorium::PropertyId::INSTANCE_OF) &&
60
- entity.record?
61
- end
62
-
63
- start_time = Time.now.utc
64
-
65
- loading_spinner = TTY::Spinner.new('[:spinner] Loading export data', hide_cursor: true)
66
- loading_spinner.auto_spin
67
-
68
- export_json = Zlib::GzipReader.open(input_file).read
69
- export_hash = DigitalScriptorium::ExportRepresenter.new(DigitalScriptorium::Export.new)
70
- .from_json(export_json)
71
- .to_hash
72
- loaded_time = Time.now.utc
73
- loading_spinner.success("(#{format('%0.02f', loaded_time - start_time)}s)")
74
-
75
- item_count = 0
76
- generating_spinner = TTY::Spinner.new('[:spinner] Generating Solr documents', hide_cursor: true)
77
- generating_spinner.auto_spin
78
-
79
- File.open(output_file, 'w') do |file|
80
- file << '['
81
- file << "\n" if pretty_print
82
-
83
- export_hash.each_with_index do |(_, entity), idx|
84
- next unless record?(entity)
85
-
86
- meta = DigitalScriptorium::DsMeta.new(entity, export_hash)
87
- solr_item = base_solr_item(meta)
88
-
89
- [meta.holding, meta.manuscript, meta.record].each do |item|
90
- item.claims.each do |property_id, claims|
91
- claims.each do |claim|
92
- next unless DigitalScriptorium::Transformers.defined? property_id
93
-
94
- begin
95
- transformer = DigitalScriptorium::Transformers.create property_id, claim, export_hash
96
- solr_item = merge solr_item, transformer.solr_props
97
- rescue StandardError => e
98
- logger.error "Error processing #{property_id} claim for item #{item.id}: #{e}"
99
- end
100
- end
101
- end
102
- end
103
-
104
- file << (pretty_print ? JSON.pretty_generate(solr_item) : JSON.generate(solr_item))
105
- file << ',' if idx < export_hash.size - 1
106
- file << "\n" if pretty_print
107
-
108
- item_count += 1
109
- end
110
-
111
- file << ']'
112
- end
113
-
114
- finish_time = Time.now.utc
115
- generating_spinner.success("(#{format('%0.02f', finish_time - loaded_time)}s)")
116
- puts "Generated #{item_count} Solr documents in #{format('%0.02f', finish_time - start_time)} seconds"