digital_scriptorium 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/digital_scriptorium/version.rb +1 -1
- metadata +2 -45
- data/wikibase_to_solr_new.rb +0 -116
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 62b823f25e2940c6a68ee4ea8db949cebbdc95f9d31682d72a60dbbb62c0a0ff
|
4
|
+
data.tar.gz: 53c69f7e20af8b7efc327214c4bfbb8378f6d80133106458231277961f4ed613
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 20ae390598e32c3276426c98dd437cd9d097e485d50bcb86757058defbbce42408a3a00b8c20fec9f86f2010078483bec4e1cd3053fc39ad42a462cf283e30e4
|
7
|
+
data.tar.gz: e200d3a053fba432f9819c68ceb3c48db5ad7d6daafc648f39ef5039b0fba345275c04f00215aff8c369fae2cf897e3eef31152bf93d375fdfa560b55b86564c
|
metadata
CHANGED
@@ -1,56 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: digital_scriptorium
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Holloway
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-01-
|
10
|
+
date: 2025-01-18 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
|
-
- !ruby/object:Gem::Dependency
|
13
|
-
name: multi_json
|
14
|
-
requirement: !ruby/object:Gem::Requirement
|
15
|
-
requirements:
|
16
|
-
- - "~>"
|
17
|
-
- !ruby/object:Gem::Version
|
18
|
-
version: '1.15'
|
19
|
-
type: :runtime
|
20
|
-
prerelease: false
|
21
|
-
version_requirements: !ruby/object:Gem::Requirement
|
22
|
-
requirements:
|
23
|
-
- - "~>"
|
24
|
-
- !ruby/object:Gem::Version
|
25
|
-
version: '1.15'
|
26
|
-
- !ruby/object:Gem::Dependency
|
27
|
-
name: representable
|
28
|
-
requirement: !ruby/object:Gem::Requirement
|
29
|
-
requirements:
|
30
|
-
- - "~>"
|
31
|
-
- !ruby/object:Gem::Version
|
32
|
-
version: '3.2'
|
33
|
-
type: :runtime
|
34
|
-
prerelease: false
|
35
|
-
version_requirements: !ruby/object:Gem::Requirement
|
36
|
-
requirements:
|
37
|
-
- - "~>"
|
38
|
-
- !ruby/object:Gem::Version
|
39
|
-
version: '3.2'
|
40
|
-
- !ruby/object:Gem::Dependency
|
41
|
-
name: tty-spinner
|
42
|
-
requirement: !ruby/object:Gem::Requirement
|
43
|
-
requirements:
|
44
|
-
- - "~>"
|
45
|
-
- !ruby/object:Gem::Version
|
46
|
-
version: '0.9'
|
47
|
-
type: :runtime
|
48
|
-
prerelease: false
|
49
|
-
version_requirements: !ruby/object:Gem::Requirement
|
50
|
-
requirements:
|
51
|
-
- - "~>"
|
52
|
-
- !ruby/object:Gem::Version
|
53
|
-
version: '0.9'
|
54
12
|
- !ruby/object:Gem::Dependency
|
55
13
|
name: wikibase_representable
|
56
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -161,7 +119,6 @@ files:
|
|
161
119
|
- lib/digital_scriptorium/transformers/uniform_title_claim_transformer.rb
|
162
120
|
- lib/digital_scriptorium/version.rb
|
163
121
|
- sig/digital_scriptorium.rbs
|
164
|
-
- wikibase_to_solr_new.rb
|
165
122
|
homepage: https://github.com/mdholloway/digital_scriptorium
|
166
123
|
licenses:
|
167
124
|
- MIT
|
data/wikibase_to_solr_new.rb
DELETED
@@ -1,116 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'digital_scriptorium'
|
4
|
-
require 'json'
|
5
|
-
require 'logging'
|
6
|
-
require 'optparse'
|
7
|
-
require 'set'
|
8
|
-
require 'time'
|
9
|
-
require 'tty-spinner'
|
10
|
-
require 'zlib'
|
11
|
-
|
12
|
-
dir = File.dirname __FILE__
|
13
|
-
|
14
|
-
input_file = File.expand_path 'wikibase_export.json.gz', dir
|
15
|
-
output_file = File.expand_path 'solr_import.json', dir
|
16
|
-
config_file = File.expand_path 'property_config.yml', dir
|
17
|
-
pretty_print = false
|
18
|
-
|
19
|
-
logger = Logging.logger($stdout)
|
20
|
-
|
21
|
-
OptionParser.new { |opts|
|
22
|
-
opts.banner = 'Usage: wikibase_to_solr.rb [options]'
|
23
|
-
|
24
|
-
opts.on('-i', '--in FILE', 'The file path to the gzipped Wikibase JSON export file.') do |f|
|
25
|
-
input_file = File.expand_path f, dir
|
26
|
-
end
|
27
|
-
|
28
|
-
opts.on('-o', '--out FILE', 'The file path to output the formatted Solr JSON file.') do |f|
|
29
|
-
output_file = File.expand_path f, dir
|
30
|
-
end
|
31
|
-
|
32
|
-
opts.on('-c', '--config FILE', 'The file path to the property configuration file.') do |f|
|
33
|
-
config_file = File.expand_path f, dir
|
34
|
-
end
|
35
|
-
|
36
|
-
opts.on('-p', '--pretty-print', 'Whether to pretty-print the JSON output.') do
|
37
|
-
pretty_print = true
|
38
|
-
end
|
39
|
-
}.parse!
|
40
|
-
|
41
|
-
def merge(solr_item, new_props)
|
42
|
-
solr_item.merge(new_props) do |_, old_val, new_val|
|
43
|
-
old_val.nil? ? new_val : (old_val + new_val).uniq
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def base_solr_item(meta)
|
48
|
-
ds_id = meta.manuscript.ds_id
|
49
|
-
{
|
50
|
-
'qid_meta' => [meta.holding.id, meta.manuscript.id, meta.record.id],
|
51
|
-
'id' => [ds_id],
|
52
|
-
'id_display' => [JSON.generate(recorded_value: ds_id)],
|
53
|
-
'id_search' => [ds_id]
|
54
|
-
}
|
55
|
-
end
|
56
|
-
|
57
|
-
def record?(entity)
|
58
|
-
entity.is_a?(DigitalScriptorium::DsItem) &&
|
59
|
-
entity.claims_by_property_id?(DigitalScriptorium::PropertyId::INSTANCE_OF) &&
|
60
|
-
entity.record?
|
61
|
-
end
|
62
|
-
|
63
|
-
start_time = Time.now.utc
|
64
|
-
|
65
|
-
loading_spinner = TTY::Spinner.new('[:spinner] Loading export data', hide_cursor: true)
|
66
|
-
loading_spinner.auto_spin
|
67
|
-
|
68
|
-
export_json = Zlib::GzipReader.open(input_file).read
|
69
|
-
export_hash = DigitalScriptorium::ExportRepresenter.new(DigitalScriptorium::Export.new)
|
70
|
-
.from_json(export_json)
|
71
|
-
.to_hash
|
72
|
-
loaded_time = Time.now.utc
|
73
|
-
loading_spinner.success("(#{format('%0.02f', loaded_time - start_time)}s)")
|
74
|
-
|
75
|
-
item_count = 0
|
76
|
-
generating_spinner = TTY::Spinner.new('[:spinner] Generating Solr documents', hide_cursor: true)
|
77
|
-
generating_spinner.auto_spin
|
78
|
-
|
79
|
-
File.open(output_file, 'w') do |file|
|
80
|
-
file << '['
|
81
|
-
file << "\n" if pretty_print
|
82
|
-
|
83
|
-
export_hash.each_with_index do |(_, entity), idx|
|
84
|
-
next unless record?(entity)
|
85
|
-
|
86
|
-
meta = DigitalScriptorium::DsMeta.new(entity, export_hash)
|
87
|
-
solr_item = base_solr_item(meta)
|
88
|
-
|
89
|
-
[meta.holding, meta.manuscript, meta.record].each do |item|
|
90
|
-
item.claims.each do |property_id, claims|
|
91
|
-
claims.each do |claim|
|
92
|
-
next unless DigitalScriptorium::Transformers.defined? property_id
|
93
|
-
|
94
|
-
begin
|
95
|
-
transformer = DigitalScriptorium::Transformers.create property_id, claim, export_hash
|
96
|
-
solr_item = merge solr_item, transformer.solr_props
|
97
|
-
rescue StandardError => e
|
98
|
-
logger.error "Error processing #{property_id} claim for item #{item.id}: #{e}"
|
99
|
-
end
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
file << (pretty_print ? JSON.pretty_generate(solr_item) : JSON.generate(solr_item))
|
105
|
-
file << ',' if idx < export_hash.size - 1
|
106
|
-
file << "\n" if pretty_print
|
107
|
-
|
108
|
-
item_count += 1
|
109
|
-
end
|
110
|
-
|
111
|
-
file << ']'
|
112
|
-
end
|
113
|
-
|
114
|
-
finish_time = Time.now.utc
|
115
|
-
generating_spinner.success("(#{format('%0.02f', finish_time - loaded_time)}s)")
|
116
|
-
puts "Generated #{item_count} Solr documents in #{format('%0.02f', finish_time - start_time)} seconds"
|