wikidata-diff-analyzer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Gemfile +19 -0
- data/Gemfile.lock +91 -0
- data/LICENSE +21 -0
- data/README.md +178 -0
- data/Rakefile +12 -0
- data/lib/wikidata/diff/alias_analyzer.rb +71 -0
- data/lib/wikidata/diff/analyzer/version.rb +9 -0
- data/lib/wikidata/diff/analyzer.rb +99 -0
- data/lib/wikidata/diff/api.rb +67 -0
- data/lib/wikidata/diff/claim_analyzer.rb +244 -0
- data/lib/wikidata/diff/description_analyzer.rb +49 -0
- data/lib/wikidata/diff/label_analyzer.rb +53 -0
- data/lib/wikidata/diff/large_batches_analyzer.rb +39 -0
- data/lib/wikidata/diff/mediawiki_login.rb +12 -0
- data/lib/wikidata/diff/revision_analyzer.rb +50 -0
- data/lib/wikidata/diff/sitelink_analyzer.rb +61 -0
- data/lib/wikidata/diff/total.rb +25 -0
- data/sig/wikidata/diff/analyzer.rbs +8 -0
- data/wikidata-diff-analyzer.gemspec +36 -0
- metadata +83 -0
@@ -0,0 +1,244 @@
|
|
1
|
+
class ClaimAnalyzer
|
2
|
+
def self.isolate_claim_differences(current_content, parent_content)
|
3
|
+
# Initialize empty arrays to store the added, removed, and changed claims
|
4
|
+
added_claims = []
|
5
|
+
removed_claims = []
|
6
|
+
changed_claims = []
|
7
|
+
added_references = []
|
8
|
+
removed_references = []
|
9
|
+
changed_references = []
|
10
|
+
added_qualifiers = []
|
11
|
+
removed_qualifiers = []
|
12
|
+
changed_qualifiers = []
|
13
|
+
|
14
|
+
if !current_content["claims"].is_a?(Hash) || !parent_content["claims"].is_a?(Hash)
|
15
|
+
return {
|
16
|
+
added_claims: added_claims,
|
17
|
+
removed_claims: removed_claims,
|
18
|
+
changed_claims: changed_claims,
|
19
|
+
added_references: added_references,
|
20
|
+
removed_references: removed_references,
|
21
|
+
changed_references: changed_references,
|
22
|
+
added_qualifiers: added_qualifiers,
|
23
|
+
removed_qualifiers: removed_qualifiers,
|
24
|
+
changed_qualifiers: changed_qualifiers
|
25
|
+
}
|
26
|
+
end
|
27
|
+
# Iterate over each claim key in the current content
|
28
|
+
current_content["claims"].each do |claim_key, current_claims|
|
29
|
+
# Check if the claim key exists in the parent content
|
30
|
+
if parent_content["claims"].key?(claim_key)
|
31
|
+
parent_claims = parent_content["claims"][claim_key]
|
32
|
+
# Iterate over each claim in the current and parent content
|
33
|
+
current_claims.each_with_index do |current_claim, index|
|
34
|
+
parent_claim = parent_claims[index]
|
35
|
+
if parent_claim.nil?
|
36
|
+
# Claim was added
|
37
|
+
added_claims << { key: claim_key, index: index }
|
38
|
+
# check if there's any references or qualifiers in this claim
|
39
|
+
added_references = reference_updates(current_claim, added_references, claim_key, index)
|
40
|
+
added_qualifiers = qualifier_updates(current_claim, added_qualifiers, claim_key, index)
|
41
|
+
|
42
|
+
elsif current_claim != parent_claim
|
43
|
+
# Claim was changed
|
44
|
+
changed_claims << { key: claim_key, index: index }
|
45
|
+
# check if there's any references or qualifiers in this claim
|
46
|
+
changed = handle_changed_references(current_claim, parent_claim, changed_references, added_references, removed_references, claim_key, index)
|
47
|
+
added_references = changed[:added_references]
|
48
|
+
removed_references = changed[:removed_references]
|
49
|
+
changed_references = changed[:changed_references]
|
50
|
+
changed_qualifiers = handle_changed_qualifiers(current_claim, parent_claim, changed_qualifiers, added_qualifiers, removed_qualifiers, claim_key, index)
|
51
|
+
added_qualifiers = changed_qualifiers[:added_qualifiers]
|
52
|
+
removed_qualifiers = changed_qualifiers[:removed_qualifiers]
|
53
|
+
changed_qualifiers = changed_qualifiers[:changed_qualifiers]
|
54
|
+
end
|
55
|
+
end
|
56
|
+
# Check for removed claims
|
57
|
+
parent_claims.each_with_index do |parent_claim, index|
|
58
|
+
current_claim = current_claims[index]
|
59
|
+
if current_claim.nil?
|
60
|
+
# Claim was removed
|
61
|
+
removed_claims << { key: claim_key, index: index }
|
62
|
+
|
63
|
+
# check if there's any references or qualifiers in this claim
|
64
|
+
removed_references = reference_updates(parent_claim, removed_references, claim_key, index)
|
65
|
+
removed_qualifiers = qualifier_updates(parent_claim, removed_qualifiers, claim_key, index)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
else
|
69
|
+
# All claims in current content with this key were added
|
70
|
+
current_claims.each_index do |index|
|
71
|
+
added_claims << { key: claim_key, index: index }
|
72
|
+
# check if there's any references or qualifiers in this claim
|
73
|
+
added_references = reference_updates(current_claims[index], added_references, claim_key, index)
|
74
|
+
added_qualifiers = qualifier_updates(current_claims[index], added_qualifiers, claim_key, index)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
parent_content["claims"].each do |claim_key, parent_claims|
|
80
|
+
# current content[claims] can be nil
|
81
|
+
parent_claims.each_index do |index|
|
82
|
+
if current_content["claims"].nil? || !current_content["claims"].key?(claim_key)
|
83
|
+
removed_claims << { key: claim_key, index: index }
|
84
|
+
# check if there's any references or qualifiers in this claim
|
85
|
+
removed_references = reference_updates(parent_claims[index], removed_references, claim_key, index)
|
86
|
+
removed_qualifiers = qualifier_updates(parent_claims[index], removed_qualifiers, claim_key, index)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# puts "Added claims: #{added_claims}"
|
92
|
+
# puts "Removed claims: #{removed_claims}"
|
93
|
+
# puts "Changed claims: #{changed_claims}"
|
94
|
+
# puts "Added references: #{added_references}"
|
95
|
+
# puts "Removed references: #{removed_references}"
|
96
|
+
# puts "Changed references: #{changed_references}"
|
97
|
+
# puts "Added qualifiers: #{added_qualifiers}"
|
98
|
+
# puts "Removed qualifiers: #{removed_qualifiers}"
|
99
|
+
# puts "Changed qualifiers: #{changed_qualifiers}"
|
100
|
+
|
101
|
+
|
102
|
+
{
|
103
|
+
added_claims: added_claims,
|
104
|
+
removed_claims: removed_claims,
|
105
|
+
changed_claims: changed_claims,
|
106
|
+
added_references: added_references,
|
107
|
+
removed_references: removed_references,
|
108
|
+
changed_references: changed_references,
|
109
|
+
added_qualifiers: added_qualifiers,
|
110
|
+
removed_qualifiers: removed_qualifiers,
|
111
|
+
changed_qualifiers: changed_qualifiers
|
112
|
+
}
|
113
|
+
end
|
114
|
+
|
115
|
+
# helper method for adding and removing references
|
116
|
+
def self.reference_updates(claim, updated_references, claim_key, claim_index)
|
117
|
+
if claim["references"]
|
118
|
+
claim["references"].each_with_index do |current_ref, ref_index|
|
119
|
+
updated_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
|
120
|
+
end
|
121
|
+
end
|
122
|
+
updated_references
|
123
|
+
end
|
124
|
+
|
125
|
+
# helper method for changed references
|
126
|
+
def self.handle_changed_references(current_claim, parent_claim, changed_references, added_references, removed_references, claim_key, claim_index)
|
127
|
+
current_references = current_claim["references"] ? current_claim["references"] : []
|
128
|
+
parent_references = parent_claim["references"] ? parent_claim["references"] : []
|
129
|
+
|
130
|
+
current_references.each_with_index do |current_ref, ref_index|
|
131
|
+
if parent_references.empty?
|
132
|
+
added_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
|
133
|
+
elsif !parent_references.include?(current_ref)
|
134
|
+
added_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
|
135
|
+
elsif ref_modified?(current_ref, parent_references)
|
136
|
+
changed_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
parent_references.each_with_index do |parent_ref, ref_index|
|
141
|
+
if !current_references.include?(parent_ref)
|
142
|
+
removed_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
{
|
147
|
+
added_references: added_references,
|
148
|
+
removed_references: removed_references,
|
149
|
+
changed_references: changed_references
|
150
|
+
}
|
151
|
+
end
|
152
|
+
|
153
|
+
# helper method for checking if a reference has been modified
|
154
|
+
def self.ref_modified?(current_reference, parent_references)
|
155
|
+
parent_references.each do |parent_reference|
|
156
|
+
if current_reference["snaks"] != parent_reference["snaks"]
|
157
|
+
return true
|
158
|
+
end
|
159
|
+
end
|
160
|
+
false
|
161
|
+
end
|
162
|
+
|
163
|
+
# helper method for adding qualifiers
|
164
|
+
# handles added and removed qualifiers
|
165
|
+
def self.qualifier_updates(claim, updated_qualifiers, claim_key, claim_index)
|
166
|
+
if claim["qualifiers"]
|
167
|
+
qualifiers = claim["qualifiers"]
|
168
|
+
qualifiers.each do |qualifier_key, qualifier_values|
|
169
|
+
qualifier_values.each_with_index do |qualifier_value, qualifier_index|
|
170
|
+
updated_qualifiers << {
|
171
|
+
claim_key: claim_key,
|
172
|
+
claim_index: claim_index,
|
173
|
+
qualifier_key: qualifier_key,
|
174
|
+
qualifier_index: qualifier_index
|
175
|
+
}
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
updated_qualifiers
|
180
|
+
end
|
181
|
+
|
182
|
+
# helper method for changed qualifiers
|
183
|
+
def self.handle_changed_qualifiers(current_claim, parent_claim, changed_qualifiers, added_qualifiers, removed_qualifiers, claim_key, claim_index)
|
184
|
+
current_qualifiers = current_claim["qualifiers"] ? current_claim["qualifiers"] : {}
|
185
|
+
parent_qualifiers = parent_claim["qualifiers"] ? parent_claim["qualifiers"] : {}
|
186
|
+
|
187
|
+
current_qualifiers.each do |qualifier_key, qualifier_values|
|
188
|
+
qualifier_values.each_with_index do |qualifier_value, qualifier_index|
|
189
|
+
if parent_qualifiers.key?(qualifier_key)
|
190
|
+
parent = parent_qualifiers[qualifier_key]
|
191
|
+
end
|
192
|
+
# Check if the qualifier index exists in the parent content
|
193
|
+
if !parent.nil?
|
194
|
+
parent = parent[qualifier_index]
|
195
|
+
# check if the parent claim was changed by comparing the objects first
|
196
|
+
if parent != qualifier_value
|
197
|
+
# Claim was changed
|
198
|
+
changed_qualifiers << {
|
199
|
+
claim_key: claim_key,
|
200
|
+
claim_index: claim_index,
|
201
|
+
qualifier_key: qualifier_key,
|
202
|
+
qualifier_index: qualifier_index
|
203
|
+
}
|
204
|
+
end
|
205
|
+
else
|
206
|
+
# Claim was added
|
207
|
+
added_qualifiers << {
|
208
|
+
claim_key: claim_key,
|
209
|
+
claim_index: claim_index,
|
210
|
+
qualifier_key: qualifier_key,
|
211
|
+
qualifier_index: qualifier_index
|
212
|
+
}
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
# Check for removed claims
|
217
|
+
parent_qualifiers.each do |qualifier_key, qualifier_values|
|
218
|
+
qualifier_values.each_with_index do |qualifier_value, qualifier_index|
|
219
|
+
if current_qualifiers.key?(qualifier_key)
|
220
|
+
current = current_qualifiers[qualifier_key]
|
221
|
+
end
|
222
|
+
# Check if the qualifier index exists in the current content
|
223
|
+
if !current.nil?
|
224
|
+
current = current[qualifier_index]
|
225
|
+
end
|
226
|
+
if current.nil?
|
227
|
+
# Claim was removed
|
228
|
+
removed_qualifiers << {
|
229
|
+
claim_key: claim_key,
|
230
|
+
claim_index: claim_index,
|
231
|
+
qualifier_key: qualifier_key,
|
232
|
+
qualifier_index: qualifier_index
|
233
|
+
}
|
234
|
+
end
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
{
|
239
|
+
added_qualifiers: added_qualifiers,
|
240
|
+
removed_qualifiers: removed_qualifiers,
|
241
|
+
changed_qualifiers: changed_qualifiers
|
242
|
+
}
|
243
|
+
end
|
244
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
class DescriptionAnalyzer
|
2
|
+
def self.isolate_descriptions_differences(current_content, parent_content)
|
3
|
+
return {
|
4
|
+
changed: [],
|
5
|
+
removed: [],
|
6
|
+
added: []
|
7
|
+
} if current_content.nil? && parent_content.nil?
|
8
|
+
|
9
|
+
current_descriptions = current_content['descriptions'] || {}
|
10
|
+
parent_descriptions = parent_content['descriptions'] || {}
|
11
|
+
|
12
|
+
|
13
|
+
changed_descriptions = [] # Initialize as an array
|
14
|
+
removed_descriptions = [] # Initialize as an array
|
15
|
+
added_descriptions = [] # Initialize as an array
|
16
|
+
|
17
|
+
if !current_descriptions.is_a?(Hash) || !parent_descriptions.is_a?(Hash)
|
18
|
+
return{
|
19
|
+
changed: changed_descriptions,
|
20
|
+
removed: removed_descriptions,
|
21
|
+
added: added_descriptions
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
# Iterate over each language in the current descriptions
|
26
|
+
(current_descriptions || {}).each do |lang, current_description|
|
27
|
+
parent_description = parent_descriptions[lang]
|
28
|
+
|
29
|
+
if parent_description.nil?
|
30
|
+
added_descriptions << { lang: lang }
|
31
|
+
elsif current_description != parent_description
|
32
|
+
changed_descriptions << { lang: lang }
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Iterate over each language in the parent descriptions to find removed descriptions
|
37
|
+
(parent_descriptions || {}).each do |lang, parent_description|
|
38
|
+
if current_descriptions[lang].nil?
|
39
|
+
removed_descriptions << { lang: lang }
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
{
|
44
|
+
changed: changed_descriptions,
|
45
|
+
removed: removed_descriptions,
|
46
|
+
added: added_descriptions
|
47
|
+
}
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
class LabelAnalyzer
|
2
|
+
def self.isolate_labels_differences(current_content, parent_content)
|
3
|
+
return {
|
4
|
+
changed: [],
|
5
|
+
removed: [],
|
6
|
+
added: []
|
7
|
+
} if current_content.nil? && parent_content.nil?
|
8
|
+
|
9
|
+
current_labels = current_content['labels'] || {}
|
10
|
+
parent_labels = parent_content['labels'] || {}
|
11
|
+
|
12
|
+
changed_labels = []
|
13
|
+
removed_labels = []
|
14
|
+
added_labels = []
|
15
|
+
|
16
|
+
if current_labels.is_a?(Array) || parent_labels.is_a?(Array)
|
17
|
+
return {
|
18
|
+
changed: changed_labels,
|
19
|
+
removed: removed_labels,
|
20
|
+
added: added_labels
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
# Iterate over each language in the current labels
|
26
|
+
(current_labels || {}).each do |lang, current_label|
|
27
|
+
parent_label = parent_labels[lang]
|
28
|
+
|
29
|
+
if parent_label.nil?
|
30
|
+
added_labels << { lang: lang }
|
31
|
+
elsif current_label != parent_label
|
32
|
+
changed_labels << { lang: lang }
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Iterate over each language in the parent labels to find removed labels
|
37
|
+
(parent_labels || {}).each do |lang, parent_label|
|
38
|
+
if current_labels[lang].nil?
|
39
|
+
removed_labels << { lang: lang }
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# puts "Changed labels: #{changed_labels}"
|
44
|
+
# puts "Removed labels: #{removed_labels}"
|
45
|
+
# puts "Added labels: #{added_labels}"
|
46
|
+
|
47
|
+
{
|
48
|
+
changed: changed_labels,
|
49
|
+
removed: removed_labels,
|
50
|
+
added: added_labels
|
51
|
+
}
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require_relative 'api'
|
2
|
+
|
3
|
+
class LargeBatchesAnalyzer
|
4
|
+
# returns revision contents and parent contents for whole revision_ids array
|
5
|
+
def self.handle_large_batches(revision_ids, batch_size)
|
6
|
+
revision_contents = {}
|
7
|
+
parent_contents = {}
|
8
|
+
|
9
|
+
|
10
|
+
revision_ids_batches = revision_ids.each_slice(batch_size).to_a
|
11
|
+
puts "Handling revision_ids_batches: #{revision_ids_batches.length}"
|
12
|
+
revision_ids_batches.each do |batch|
|
13
|
+
parsed_contents = Api.get_revision_contents(batch)
|
14
|
+
if parsed_contents
|
15
|
+
parent_ids = []
|
16
|
+
revision_contents.merge!(parsed_contents) if parsed_contents
|
17
|
+
parsed_contents.values.each do |data|
|
18
|
+
parent_id = data[:parentid]
|
19
|
+
|
20
|
+
if parent_id != 0 && !parent_id.nil?
|
21
|
+
parent_ids << parent_id
|
22
|
+
end
|
23
|
+
end
|
24
|
+
parent_contents_batch = Api.get_revision_contents(parent_ids)
|
25
|
+
parent_contents.merge!(parent_contents_batch) if parent_contents_batch
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
result = {}
|
30
|
+
revision_contents.each do |revid, data|
|
31
|
+
parentid = data[:parentid]
|
32
|
+
parent_content = parent_contents[parentid] if parentid
|
33
|
+
current = data ? data[:content] : nil
|
34
|
+
parent = parent_content ? parent_content[:content] : nil
|
35
|
+
result[revid] = { current_content: current, parent_content: parent }
|
36
|
+
end
|
37
|
+
result
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'mediawiki_api'
|
2
|
+
# to load env variable
|
3
|
+
require 'dotenv/load'
|
4
|
+
|
5
|
+
# THIS IS NOT WORKING YET
|
6
|
+
class MediawikiLogin
|
7
|
+
def self.mediawiki_login
|
8
|
+
client = MediawikiApi::Client.new('https://www.mediawiki.org/w/api.php')
|
9
|
+
client.log_in(ENV['MEDIAWIKI_USERNAME'], ENV['MEDIAWIKI_PASSWORD'])
|
10
|
+
client.logged_in? # Return whether login was successful
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require_relative 'claim_analyzer'
|
2
|
+
require_relative 'alias_analyzer'
|
3
|
+
require_relative 'label_analyzer'
|
4
|
+
require_relative 'description_analyzer'
|
5
|
+
require_relative 'sitelink_analyzer'
|
6
|
+
|
7
|
+
class RevisionAnalyzer
|
8
|
+
# This method takes two revisions as input and returns the differences between them.
|
9
|
+
def self.analyze_diff(current_content, parent_content)
|
10
|
+
diff = {}
|
11
|
+
# Calculate claim differences includes references and qualifiers
|
12
|
+
claim_diff = ClaimAnalyzer.isolate_claim_differences(current_content, parent_content)
|
13
|
+
diff[:added_claims] = claim_diff[:added_claims].length
|
14
|
+
diff[:removed_claims] = claim_diff[:removed_claims].length
|
15
|
+
diff[:changed_claims] = claim_diff[:changed_claims].length
|
16
|
+
diff[:added_references] = claim_diff[:added_references].length
|
17
|
+
diff[:removed_references] = claim_diff[:removed_references].length
|
18
|
+
diff[:changed_references] = claim_diff[:changed_references].length
|
19
|
+
diff[:added_qualifiers] = claim_diff[:added_qualifiers].length
|
20
|
+
diff[:removed_qualifiers] = claim_diff[:removed_qualifiers].length
|
21
|
+
diff[:changed_qualifiers] = claim_diff[:changed_qualifiers].length
|
22
|
+
|
23
|
+
# Calculate alias differences
|
24
|
+
alias_diff = AliasAnalyzer.isolate_aliases_differences(current_content, parent_content)
|
25
|
+
diff[:added_aliases] = alias_diff[:added].length
|
26
|
+
diff[:removed_aliases] = alias_diff[:removed].length
|
27
|
+
diff[:changed_aliases] = alias_diff[:changed].length
|
28
|
+
|
29
|
+
|
30
|
+
# Calculate label differences
|
31
|
+
label_diff = LabelAnalyzer.isolate_labels_differences(current_content, parent_content)
|
32
|
+
diff[:added_labels] = label_diff[:added].length
|
33
|
+
diff[:removed_labels] = label_diff[:removed].length
|
34
|
+
diff[:changed_labels] = label_diff[:changed].length
|
35
|
+
|
36
|
+
# Calculate description differences
|
37
|
+
description_diff = DescriptionAnalyzer.isolate_descriptions_differences(current_content, parent_content)
|
38
|
+
diff[:added_descriptions] = description_diff[:added].length
|
39
|
+
diff[:removed_descriptions] = description_diff[:removed].length
|
40
|
+
diff[:changed_descriptions] = description_diff[:changed].length
|
41
|
+
|
42
|
+
# Calculate sitelink differences
|
43
|
+
sitelink_diff = SitelinkAnalyzer.isolate_sitelinks_differences(current_content, parent_content)
|
44
|
+
diff[:added_sitelinks] = sitelink_diff[:added].length
|
45
|
+
diff[:removed_sitelinks] = sitelink_diff[:removed].length
|
46
|
+
diff[:changed_sitelinks] = sitelink_diff[:changed].length
|
47
|
+
|
48
|
+
diff
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
class SitelinkAnalyzer
|
2
|
+
def self.isolate_sitelinks_differences(current_content, parent_content)
|
3
|
+
added_sitelinks = {}
|
4
|
+
removed_sitelinks = {}
|
5
|
+
changed_sitelinks = {}
|
6
|
+
|
7
|
+
# Check if both current and parent content exist
|
8
|
+
if current_content && parent_content
|
9
|
+
current_sitelinks = current_content['sitelinks']
|
10
|
+
parent_sitelinks = parent_content['sitelinks']
|
11
|
+
|
12
|
+
# Check added sitelinks
|
13
|
+
if current_sitelinks.respond_to?(:each)
|
14
|
+
current_sitelinks.each do |site_key, current_sitelink|
|
15
|
+
unless parent_sitelinks.respond_to?(:key?) && parent_sitelinks.key?(site_key)
|
16
|
+
added_sitelinks[site_key] = current_sitelink
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# Check removed sitelinks
|
22
|
+
if parent_sitelinks.respond_to?(:each)
|
23
|
+
parent_sitelinks.each do |site_key, parent_sitelink|
|
24
|
+
unless current_sitelinks.respond_to?(:key?) && current_sitelinks.key?(site_key)
|
25
|
+
removed_sitelinks[site_key] = parent_sitelink
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
# Check changed sitelinks
|
30
|
+
if current_sitelinks && parent_sitelinks
|
31
|
+
current_sitelinks.each do |site_key, current_sitelink|
|
32
|
+
if parent_sitelinks.respond_to?(:key?) && parent_sitelinks.key?(site_key)
|
33
|
+
parent_sitelink = parent_sitelinks[site_key]
|
34
|
+
if current_sitelink != parent_sitelink
|
35
|
+
changed_sitelinks[site_key] = {
|
36
|
+
current: current_sitelink,
|
37
|
+
parent: parent_sitelink
|
38
|
+
}
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
elsif current_content
|
44
|
+
# All sitelinks are added if parent content is nil
|
45
|
+
added_sitelinks = current_content['sitelinks']
|
46
|
+
elsif parent_content
|
47
|
+
# All sitelinks are removed if current content is nil
|
48
|
+
removed_sitelinks = parent_content['sitelinks']
|
49
|
+
end
|
50
|
+
|
51
|
+
# puts "Added sitelinks: #{added_sitelinks}"
|
52
|
+
# puts "Removed sitelinks: #{removed_sitelinks}"
|
53
|
+
# puts "Changed sitelinks: #{changed_sitelinks}"
|
54
|
+
|
55
|
+
{
|
56
|
+
added: added_sitelinks,
|
57
|
+
removed: removed_sitelinks,
|
58
|
+
changed: changed_sitelinks
|
59
|
+
}
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
class Total
|
2
|
+
def self.accumulate_totals(diff_data, total)
|
3
|
+
total[:claims_added] += diff_data[:added_claims]
|
4
|
+
total[:claims_removed] += diff_data[:removed_claims]
|
5
|
+
total[:claims_changed] += diff_data[:changed_claims]
|
6
|
+
total[:qualifiers_added] += diff_data[:added_qualifiers]
|
7
|
+
total[:qualifiers_removed] += diff_data[:removed_qualifiers]
|
8
|
+
total[:qualifiers_changed] += diff_data[:changed_qualifiers]
|
9
|
+
total[:references_added] += diff_data[:added_references]
|
10
|
+
total[:references_removed] += diff_data[:removed_references]
|
11
|
+
total[:references_changed] += diff_data[:changed_references]
|
12
|
+
total[:aliases_added] += diff_data[:added_aliases]
|
13
|
+
total[:aliases_removed] += diff_data[:removed_aliases]
|
14
|
+
total[:aliases_changed] += diff_data[:changed_aliases]
|
15
|
+
total[:labels_added] += diff_data[:added_labels]
|
16
|
+
total[:labels_removed] += diff_data[:removed_labels]
|
17
|
+
total[:labels_changed] += diff_data[:changed_labels]
|
18
|
+
total[:descriptions_added] += diff_data[:added_descriptions]
|
19
|
+
total[:descriptions_removed] += diff_data[:removed_descriptions]
|
20
|
+
total[:descriptions_changed] += diff_data[:changed_descriptions]
|
21
|
+
total[:sitelinks_added] += diff_data[:added_sitelinks]
|
22
|
+
total[:sitelinks_removed] += diff_data[:removed_sitelinks]
|
23
|
+
total[:sitelinks_changed] += diff_data[:changed_sitelinks]
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "lib/wikidata/diff/analyzer/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "wikidata-diff-analyzer"
|
7
|
+
spec.version = Wikidata::Diff::Analyzer::VERSION
|
8
|
+
spec.authors = ["Sulagna Saha"]
|
9
|
+
spec.email = ["saha23s@mtholyoke.edu"]
|
10
|
+
|
11
|
+
spec.summary = "A Ruby gem for analyzing diffs between Wikidata items."
|
12
|
+
spec.description = "This gem provides tools for analyzing diffs between Wikidata items, including retrieving the JSON representation of an item for a specific revision."
|
13
|
+
spec.homepage = "https://github.com/WikiEducationFoundation/wikidata-diff-analyzer"
|
14
|
+
spec.license = "MIT"
|
15
|
+
spec.required_ruby_version = ">= 2.6.0"
|
16
|
+
|
17
|
+
spec.metadata["allowed_push_host"] = "https://rubygems.org"
|
18
|
+
|
19
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
20
|
+
spec.metadata["source_code_uri"] = "https://github.com/WikiEducationFoundation/wikidata-diff-analyzer"
|
21
|
+
spec.metadata["changelog_uri"] = "https://github.com/WikiEducationFoundation/wikidata-diff-analyzer/blob/main/CHANGELOG.md"
|
22
|
+
|
23
|
+
# Specify which files should be added to the gem when it is released.
|
24
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
25
|
+
spec.files = Dir.chdir(__dir__) do
|
26
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
27
|
+
(File.expand_path(f) == __FILE__) || f.start_with?(*%w[bin/ test/ spec/ features/ .git .circleci appveyor])
|
28
|
+
end
|
29
|
+
end
|
30
|
+
spec.bindir = "exe"
|
31
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
32
|
+
spec.require_paths = ["lib"]
|
33
|
+
|
34
|
+
# Adding the dependencies
|
35
|
+
spec.add_dependency 'json', '~> 2.1'
|
36
|
+
end
|
metadata
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wikidata-diff-analyzer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Sulagna Saha
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-07-05 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: json
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.1'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.1'
|
27
|
+
description: This gem provides tools for analyzing diffs between Wikidata items, including
|
28
|
+
retrieving the JSON representation of an item for a specific revision.
|
29
|
+
email:
|
30
|
+
- saha23s@mtholyoke.edu
|
31
|
+
executables: []
|
32
|
+
extensions: []
|
33
|
+
extra_rdoc_files: []
|
34
|
+
files:
|
35
|
+
- CHANGELOG.md
|
36
|
+
- CODE_OF_CONDUCT.md
|
37
|
+
- Gemfile
|
38
|
+
- Gemfile.lock
|
39
|
+
- LICENSE
|
40
|
+
- README.md
|
41
|
+
- Rakefile
|
42
|
+
- lib/wikidata/diff/alias_analyzer.rb
|
43
|
+
- lib/wikidata/diff/analyzer.rb
|
44
|
+
- lib/wikidata/diff/analyzer/version.rb
|
45
|
+
- lib/wikidata/diff/api.rb
|
46
|
+
- lib/wikidata/diff/claim_analyzer.rb
|
47
|
+
- lib/wikidata/diff/description_analyzer.rb
|
48
|
+
- lib/wikidata/diff/label_analyzer.rb
|
49
|
+
- lib/wikidata/diff/large_batches_analyzer.rb
|
50
|
+
- lib/wikidata/diff/mediawiki_login.rb
|
51
|
+
- lib/wikidata/diff/revision_analyzer.rb
|
52
|
+
- lib/wikidata/diff/sitelink_analyzer.rb
|
53
|
+
- lib/wikidata/diff/total.rb
|
54
|
+
- sig/wikidata/diff/analyzer.rbs
|
55
|
+
- wikidata-diff-analyzer.gemspec
|
56
|
+
homepage: https://github.com/WikiEducationFoundation/wikidata-diff-analyzer
|
57
|
+
licenses:
|
58
|
+
- MIT
|
59
|
+
metadata:
|
60
|
+
allowed_push_host: https://rubygems.org
|
61
|
+
homepage_uri: https://github.com/WikiEducationFoundation/wikidata-diff-analyzer
|
62
|
+
source_code_uri: https://github.com/WikiEducationFoundation/wikidata-diff-analyzer
|
63
|
+
changelog_uri: https://github.com/WikiEducationFoundation/wikidata-diff-analyzer/blob/main/CHANGELOG.md
|
64
|
+
post_install_message:
|
65
|
+
rdoc_options: []
|
66
|
+
require_paths:
|
67
|
+
- lib
|
68
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: 2.6.0
|
73
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
requirements: []
|
79
|
+
rubygems_version: 3.3.7
|
80
|
+
signing_key:
|
81
|
+
specification_version: 4
|
82
|
+
summary: A Ruby gem for analyzing diffs between Wikidata items.
|
83
|
+
test_files: []
|