wikidata-diff-analyzer 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Gemfile +19 -0
- data/Gemfile.lock +91 -0
- data/LICENSE +21 -0
- data/README.md +178 -0
- data/Rakefile +12 -0
- data/lib/wikidata/diff/alias_analyzer.rb +71 -0
- data/lib/wikidata/diff/analyzer/version.rb +9 -0
- data/lib/wikidata/diff/analyzer.rb +99 -0
- data/lib/wikidata/diff/api.rb +67 -0
- data/lib/wikidata/diff/claim_analyzer.rb +244 -0
- data/lib/wikidata/diff/description_analyzer.rb +49 -0
- data/lib/wikidata/diff/label_analyzer.rb +53 -0
- data/lib/wikidata/diff/large_batches_analyzer.rb +39 -0
- data/lib/wikidata/diff/mediawiki_login.rb +12 -0
- data/lib/wikidata/diff/revision_analyzer.rb +50 -0
- data/lib/wikidata/diff/sitelink_analyzer.rb +61 -0
- data/lib/wikidata/diff/total.rb +25 -0
- data/sig/wikidata/diff/analyzer.rbs +8 -0
- data/wikidata-diff-analyzer.gemspec +36 -0
- metadata +83 -0
@@ -0,0 +1,244 @@
|
|
1
|
+
class ClaimAnalyzer
|
2
|
+
def self.isolate_claim_differences(current_content, parent_content)
|
3
|
+
# Initialize empty arrays to store the added, removed, and changed claims
|
4
|
+
added_claims = []
|
5
|
+
removed_claims = []
|
6
|
+
changed_claims = []
|
7
|
+
added_references = []
|
8
|
+
removed_references = []
|
9
|
+
changed_references = []
|
10
|
+
added_qualifiers = []
|
11
|
+
removed_qualifiers = []
|
12
|
+
changed_qualifiers = []
|
13
|
+
|
14
|
+
if !current_content["claims"].is_a?(Hash) || !parent_content["claims"].is_a?(Hash)
|
15
|
+
return {
|
16
|
+
added_claims: added_claims,
|
17
|
+
removed_claims: removed_claims,
|
18
|
+
changed_claims: changed_claims,
|
19
|
+
added_references: added_references,
|
20
|
+
removed_references: removed_references,
|
21
|
+
changed_references: changed_references,
|
22
|
+
added_qualifiers: added_qualifiers,
|
23
|
+
removed_qualifiers: removed_qualifiers,
|
24
|
+
changed_qualifiers: changed_qualifiers
|
25
|
+
}
|
26
|
+
end
|
27
|
+
# Iterate over each claim key in the current content
|
28
|
+
current_content["claims"].each do |claim_key, current_claims|
|
29
|
+
# Check if the claim key exists in the parent content
|
30
|
+
if parent_content["claims"].key?(claim_key)
|
31
|
+
parent_claims = parent_content["claims"][claim_key]
|
32
|
+
# Iterate over each claim in the current and parent content
|
33
|
+
current_claims.each_with_index do |current_claim, index|
|
34
|
+
parent_claim = parent_claims[index]
|
35
|
+
if parent_claim.nil?
|
36
|
+
# Claim was added
|
37
|
+
added_claims << { key: claim_key, index: index }
|
38
|
+
# check if there's any references or qualifiers in this claim
|
39
|
+
added_references = reference_updates(current_claim, added_references, claim_key, index)
|
40
|
+
added_qualifiers = qualifier_updates(current_claim, added_qualifiers, claim_key, index)
|
41
|
+
|
42
|
+
elsif current_claim != parent_claim
|
43
|
+
# Claim was changed
|
44
|
+
changed_claims << { key: claim_key, index: index }
|
45
|
+
# check if there's any references or qualifiers in this claim
|
46
|
+
changed = handle_changed_references(current_claim, parent_claim, changed_references, added_references, removed_references, claim_key, index)
|
47
|
+
added_references = changed[:added_references]
|
48
|
+
removed_references = changed[:removed_references]
|
49
|
+
changed_references = changed[:changed_references]
|
50
|
+
changed_qualifiers = handle_changed_qualifiers(current_claim, parent_claim, changed_qualifiers, added_qualifiers, removed_qualifiers, claim_key, index)
|
51
|
+
added_qualifiers = changed_qualifiers[:added_qualifiers]
|
52
|
+
removed_qualifiers = changed_qualifiers[:removed_qualifiers]
|
53
|
+
changed_qualifiers = changed_qualifiers[:changed_qualifiers]
|
54
|
+
end
|
55
|
+
end
|
56
|
+
# Check for removed claims
|
57
|
+
parent_claims.each_with_index do |parent_claim, index|
|
58
|
+
current_claim = current_claims[index]
|
59
|
+
if current_claim.nil?
|
60
|
+
# Claim was removed
|
61
|
+
removed_claims << { key: claim_key, index: index }
|
62
|
+
|
63
|
+
# check if there's any references or qualifiers in this claim
|
64
|
+
removed_references = reference_updates(parent_claim, removed_references, claim_key, index)
|
65
|
+
removed_qualifiers = qualifier_updates(parent_claim, removed_qualifiers, claim_key, index)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
else
|
69
|
+
# All claims in current content with this key were added
|
70
|
+
current_claims.each_index do |index|
|
71
|
+
added_claims << { key: claim_key, index: index }
|
72
|
+
# check if there's any references or qualifiers in this claim
|
73
|
+
added_references = reference_updates(current_claims[index], added_references, claim_key, index)
|
74
|
+
added_qualifiers = qualifier_updates(current_claims[index], added_qualifiers, claim_key, index)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
parent_content["claims"].each do |claim_key, parent_claims|
|
80
|
+
# current content[claims] can be nil
|
81
|
+
parent_claims.each_index do |index|
|
82
|
+
if current_content["claims"].nil? || !current_content["claims"].key?(claim_key)
|
83
|
+
removed_claims << { key: claim_key, index: index }
|
84
|
+
# check if there's any references or qualifiers in this claim
|
85
|
+
removed_references = reference_updates(parent_claims[index], removed_references, claim_key, index)
|
86
|
+
removed_qualifiers = qualifier_updates(parent_claims[index], removed_qualifiers, claim_key, index)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# puts "Added claims: #{added_claims}"
|
92
|
+
# puts "Removed claims: #{removed_claims}"
|
93
|
+
# puts "Changed claims: #{changed_claims}"
|
94
|
+
# puts "Added references: #{added_references}"
|
95
|
+
# puts "Removed references: #{removed_references}"
|
96
|
+
# puts "Changed references: #{changed_references}"
|
97
|
+
# puts "Added qualifiers: #{added_qualifiers}"
|
98
|
+
# puts "Removed qualifiers: #{removed_qualifiers}"
|
99
|
+
# puts "Changed qualifiers: #{changed_qualifiers}"
|
100
|
+
|
101
|
+
|
102
|
+
{
|
103
|
+
added_claims: added_claims,
|
104
|
+
removed_claims: removed_claims,
|
105
|
+
changed_claims: changed_claims,
|
106
|
+
added_references: added_references,
|
107
|
+
removed_references: removed_references,
|
108
|
+
changed_references: changed_references,
|
109
|
+
added_qualifiers: added_qualifiers,
|
110
|
+
removed_qualifiers: removed_qualifiers,
|
111
|
+
changed_qualifiers: changed_qualifiers
|
112
|
+
}
|
113
|
+
end
|
114
|
+
|
115
|
+
# helper method for adding and removing references
|
116
|
+
def self.reference_updates(claim, updated_references, claim_key, claim_index)
|
117
|
+
if claim["references"]
|
118
|
+
claim["references"].each_with_index do |current_ref, ref_index|
|
119
|
+
updated_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
|
120
|
+
end
|
121
|
+
end
|
122
|
+
updated_references
|
123
|
+
end
|
124
|
+
|
125
|
+
# helper method for changed references
|
126
|
+
def self.handle_changed_references(current_claim, parent_claim, changed_references, added_references, removed_references, claim_key, claim_index)
|
127
|
+
current_references = current_claim["references"] ? current_claim["references"] : []
|
128
|
+
parent_references = parent_claim["references"] ? parent_claim["references"] : []
|
129
|
+
|
130
|
+
current_references.each_with_index do |current_ref, ref_index|
|
131
|
+
if parent_references.empty?
|
132
|
+
added_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
|
133
|
+
elsif !parent_references.include?(current_ref)
|
134
|
+
added_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
|
135
|
+
elsif ref_modified?(current_ref, parent_references)
|
136
|
+
changed_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
parent_references.each_with_index do |parent_ref, ref_index|
|
141
|
+
if !current_references.include?(parent_ref)
|
142
|
+
removed_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
{
|
147
|
+
added_references: added_references,
|
148
|
+
removed_references: removed_references,
|
149
|
+
changed_references: changed_references
|
150
|
+
}
|
151
|
+
end
|
152
|
+
|
153
|
+
# helper method for checking if a reference has been modified
|
154
|
+
def self.ref_modified?(current_reference, parent_references)
|
155
|
+
parent_references.each do |parent_reference|
|
156
|
+
if current_reference["snaks"] != parent_reference["snaks"]
|
157
|
+
return true
|
158
|
+
end
|
159
|
+
end
|
160
|
+
false
|
161
|
+
end
|
162
|
+
|
163
|
+
# helper method for adding qualifiers
|
164
|
+
# handles added and removed qualifiers
|
165
|
+
def self.qualifier_updates(claim, updated_qualifiers, claim_key, claim_index)
|
166
|
+
if claim["qualifiers"]
|
167
|
+
qualifiers = claim["qualifiers"]
|
168
|
+
qualifiers.each do |qualifier_key, qualifier_values|
|
169
|
+
qualifier_values.each_with_index do |qualifier_value, qualifier_index|
|
170
|
+
updated_qualifiers << {
|
171
|
+
claim_key: claim_key,
|
172
|
+
claim_index: claim_index,
|
173
|
+
qualifier_key: qualifier_key,
|
174
|
+
qualifier_index: qualifier_index
|
175
|
+
}
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
updated_qualifiers
|
180
|
+
end
|
181
|
+
|
182
|
+
# helper method for changed qualifiers
|
183
|
+
def self.handle_changed_qualifiers(current_claim, parent_claim, changed_qualifiers, added_qualifiers, removed_qualifiers, claim_key, claim_index)
|
184
|
+
current_qualifiers = current_claim["qualifiers"] ? current_claim["qualifiers"] : {}
|
185
|
+
parent_qualifiers = parent_claim["qualifiers"] ? parent_claim["qualifiers"] : {}
|
186
|
+
|
187
|
+
current_qualifiers.each do |qualifier_key, qualifier_values|
|
188
|
+
qualifier_values.each_with_index do |qualifier_value, qualifier_index|
|
189
|
+
if parent_qualifiers.key?(qualifier_key)
|
190
|
+
parent = parent_qualifiers[qualifier_key]
|
191
|
+
end
|
192
|
+
# Check if the qualifier index exists in the parent content
|
193
|
+
if !parent.nil?
|
194
|
+
parent = parent[qualifier_index]
|
195
|
+
# check if the parent claim was changed by comparing the objects first
|
196
|
+
if parent != qualifier_value
|
197
|
+
# Claim was changed
|
198
|
+
changed_qualifiers << {
|
199
|
+
claim_key: claim_key,
|
200
|
+
claim_index: claim_index,
|
201
|
+
qualifier_key: qualifier_key,
|
202
|
+
qualifier_index: qualifier_index
|
203
|
+
}
|
204
|
+
end
|
205
|
+
else
|
206
|
+
# Claim was added
|
207
|
+
added_qualifiers << {
|
208
|
+
claim_key: claim_key,
|
209
|
+
claim_index: claim_index,
|
210
|
+
qualifier_key: qualifier_key,
|
211
|
+
qualifier_index: qualifier_index
|
212
|
+
}
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
# Check for removed claims
|
217
|
+
parent_qualifiers.each do |qualifier_key, qualifier_values|
|
218
|
+
qualifier_values.each_with_index do |qualifier_value, qualifier_index|
|
219
|
+
if current_qualifiers.key?(qualifier_key)
|
220
|
+
current = current_qualifiers[qualifier_key]
|
221
|
+
end
|
222
|
+
# Check if the qualifier index exists in the current content
|
223
|
+
if !current.nil?
|
224
|
+
current = current[qualifier_index]
|
225
|
+
end
|
226
|
+
if current.nil?
|
227
|
+
# Claim was removed
|
228
|
+
removed_qualifiers << {
|
229
|
+
claim_key: claim_key,
|
230
|
+
claim_index: claim_index,
|
231
|
+
qualifier_key: qualifier_key,
|
232
|
+
qualifier_index: qualifier_index
|
233
|
+
}
|
234
|
+
end
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
{
|
239
|
+
added_qualifiers: added_qualifiers,
|
240
|
+
removed_qualifiers: removed_qualifiers,
|
241
|
+
changed_qualifiers: changed_qualifiers
|
242
|
+
}
|
243
|
+
end
|
244
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
class DescriptionAnalyzer
|
2
|
+
def self.isolate_descriptions_differences(current_content, parent_content)
|
3
|
+
return {
|
4
|
+
changed: [],
|
5
|
+
removed: [],
|
6
|
+
added: []
|
7
|
+
} if current_content.nil? && parent_content.nil?
|
8
|
+
|
9
|
+
current_descriptions = current_content['descriptions'] || {}
|
10
|
+
parent_descriptions = parent_content['descriptions'] || {}
|
11
|
+
|
12
|
+
|
13
|
+
changed_descriptions = [] # Initialize as an array
|
14
|
+
removed_descriptions = [] # Initialize as an array
|
15
|
+
added_descriptions = [] # Initialize as an array
|
16
|
+
|
17
|
+
if !current_descriptions.is_a?(Hash) || !parent_descriptions.is_a?(Hash)
|
18
|
+
return{
|
19
|
+
changed: changed_descriptions,
|
20
|
+
removed: removed_descriptions,
|
21
|
+
added: added_descriptions
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
# Iterate over each language in the current descriptions
|
26
|
+
(current_descriptions || {}).each do |lang, current_description|
|
27
|
+
parent_description = parent_descriptions[lang]
|
28
|
+
|
29
|
+
if parent_description.nil?
|
30
|
+
added_descriptions << { lang: lang }
|
31
|
+
elsif current_description != parent_description
|
32
|
+
changed_descriptions << { lang: lang }
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Iterate over each language in the parent descriptions to find removed descriptions
|
37
|
+
(parent_descriptions || {}).each do |lang, parent_description|
|
38
|
+
if current_descriptions[lang].nil?
|
39
|
+
removed_descriptions << { lang: lang }
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
{
|
44
|
+
changed: changed_descriptions,
|
45
|
+
removed: removed_descriptions,
|
46
|
+
added: added_descriptions
|
47
|
+
}
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
class LabelAnalyzer
|
2
|
+
def self.isolate_labels_differences(current_content, parent_content)
|
3
|
+
return {
|
4
|
+
changed: [],
|
5
|
+
removed: [],
|
6
|
+
added: []
|
7
|
+
} if current_content.nil? && parent_content.nil?
|
8
|
+
|
9
|
+
current_labels = current_content['labels'] || {}
|
10
|
+
parent_labels = parent_content['labels'] || {}
|
11
|
+
|
12
|
+
changed_labels = []
|
13
|
+
removed_labels = []
|
14
|
+
added_labels = []
|
15
|
+
|
16
|
+
if current_labels.is_a?(Array) || parent_labels.is_a?(Array)
|
17
|
+
return {
|
18
|
+
changed: changed_labels,
|
19
|
+
removed: removed_labels,
|
20
|
+
added: added_labels
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
# Iterate over each language in the current labels
|
26
|
+
(current_labels || {}).each do |lang, current_label|
|
27
|
+
parent_label = parent_labels[lang]
|
28
|
+
|
29
|
+
if parent_label.nil?
|
30
|
+
added_labels << { lang: lang }
|
31
|
+
elsif current_label != parent_label
|
32
|
+
changed_labels << { lang: lang }
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Iterate over each language in the parent labels to find removed labels
|
37
|
+
(parent_labels || {}).each do |lang, parent_label|
|
38
|
+
if current_labels[lang].nil?
|
39
|
+
removed_labels << { lang: lang }
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# puts "Changed labels: #{changed_labels}"
|
44
|
+
# puts "Removed labels: #{removed_labels}"
|
45
|
+
# puts "Added labels: #{added_labels}"
|
46
|
+
|
47
|
+
{
|
48
|
+
changed: changed_labels,
|
49
|
+
removed: removed_labels,
|
50
|
+
added: added_labels
|
51
|
+
}
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require_relative 'api'
|
2
|
+
|
3
|
+
class LargeBatchesAnalyzer
|
4
|
+
# returns revision contents and parent contents for whole revision_ids array
|
5
|
+
def self.handle_large_batches(revision_ids, batch_size)
|
6
|
+
revision_contents = {}
|
7
|
+
parent_contents = {}
|
8
|
+
|
9
|
+
|
10
|
+
revision_ids_batches = revision_ids.each_slice(batch_size).to_a
|
11
|
+
puts "Handling revision_ids_batches: #{revision_ids_batches.length}"
|
12
|
+
revision_ids_batches.each do |batch|
|
13
|
+
parsed_contents = Api.get_revision_contents(batch)
|
14
|
+
if parsed_contents
|
15
|
+
parent_ids = []
|
16
|
+
revision_contents.merge!(parsed_contents) if parsed_contents
|
17
|
+
parsed_contents.values.each do |data|
|
18
|
+
parent_id = data[:parentid]
|
19
|
+
|
20
|
+
if parent_id != 0 && !parent_id.nil?
|
21
|
+
parent_ids << parent_id
|
22
|
+
end
|
23
|
+
end
|
24
|
+
parent_contents_batch = Api.get_revision_contents(parent_ids)
|
25
|
+
parent_contents.merge!(parent_contents_batch) if parent_contents_batch
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
result = {}
|
30
|
+
revision_contents.each do |revid, data|
|
31
|
+
parentid = data[:parentid]
|
32
|
+
parent_content = parent_contents[parentid] if parentid
|
33
|
+
current = data ? data[:content] : nil
|
34
|
+
parent = parent_content ? parent_content[:content] : nil
|
35
|
+
result[revid] = { current_content: current, parent_content: parent }
|
36
|
+
end
|
37
|
+
result
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'mediawiki_api'
|
2
|
+
# to load env variable
|
3
|
+
require 'dotenv/load'
|
4
|
+
|
5
|
+
# THIS IS NOT WORKING YET
|
6
|
+
class MediawikiLogin
|
7
|
+
def self.mediawiki_login
|
8
|
+
client = MediawikiApi::Client.new('https://www.mediawiki.org/w/api.php')
|
9
|
+
client.log_in(ENV['MEDIAWIKI_USERNAME'], ENV['MEDIAWIKI_PASSWORD'])
|
10
|
+
client.logged_in? # Return whether login was successful
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require_relative 'claim_analyzer'
|
2
|
+
require_relative 'alias_analyzer'
|
3
|
+
require_relative 'label_analyzer'
|
4
|
+
require_relative 'description_analyzer'
|
5
|
+
require_relative 'sitelink_analyzer'
|
6
|
+
|
7
|
+
class RevisionAnalyzer
|
8
|
+
# This method takes two revisions as input and returns the differences between them.
|
9
|
+
def self.analyze_diff(current_content, parent_content)
|
10
|
+
diff = {}
|
11
|
+
# Calculate claim differences includes references and qualifiers
|
12
|
+
claim_diff = ClaimAnalyzer.isolate_claim_differences(current_content, parent_content)
|
13
|
+
diff[:added_claims] = claim_diff[:added_claims].length
|
14
|
+
diff[:removed_claims] = claim_diff[:removed_claims].length
|
15
|
+
diff[:changed_claims] = claim_diff[:changed_claims].length
|
16
|
+
diff[:added_references] = claim_diff[:added_references].length
|
17
|
+
diff[:removed_references] = claim_diff[:removed_references].length
|
18
|
+
diff[:changed_references] = claim_diff[:changed_references].length
|
19
|
+
diff[:added_qualifiers] = claim_diff[:added_qualifiers].length
|
20
|
+
diff[:removed_qualifiers] = claim_diff[:removed_qualifiers].length
|
21
|
+
diff[:changed_qualifiers] = claim_diff[:changed_qualifiers].length
|
22
|
+
|
23
|
+
# Calculate alias differences
|
24
|
+
alias_diff = AliasAnalyzer.isolate_aliases_differences(current_content, parent_content)
|
25
|
+
diff[:added_aliases] = alias_diff[:added].length
|
26
|
+
diff[:removed_aliases] = alias_diff[:removed].length
|
27
|
+
diff[:changed_aliases] = alias_diff[:changed].length
|
28
|
+
|
29
|
+
|
30
|
+
# Calculate label differences
|
31
|
+
label_diff = LabelAnalyzer.isolate_labels_differences(current_content, parent_content)
|
32
|
+
diff[:added_labels] = label_diff[:added].length
|
33
|
+
diff[:removed_labels] = label_diff[:removed].length
|
34
|
+
diff[:changed_labels] = label_diff[:changed].length
|
35
|
+
|
36
|
+
# Calculate description differences
|
37
|
+
description_diff = DescriptionAnalyzer.isolate_descriptions_differences(current_content, parent_content)
|
38
|
+
diff[:added_descriptions] = description_diff[:added].length
|
39
|
+
diff[:removed_descriptions] = description_diff[:removed].length
|
40
|
+
diff[:changed_descriptions] = description_diff[:changed].length
|
41
|
+
|
42
|
+
# Calculate sitelink differences
|
43
|
+
sitelink_diff = SitelinkAnalyzer.isolate_sitelinks_differences(current_content, parent_content)
|
44
|
+
diff[:added_sitelinks] = sitelink_diff[:added].length
|
45
|
+
diff[:removed_sitelinks] = sitelink_diff[:removed].length
|
46
|
+
diff[:changed_sitelinks] = sitelink_diff[:changed].length
|
47
|
+
|
48
|
+
diff
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
class SitelinkAnalyzer
|
2
|
+
def self.isolate_sitelinks_differences(current_content, parent_content)
|
3
|
+
added_sitelinks = {}
|
4
|
+
removed_sitelinks = {}
|
5
|
+
changed_sitelinks = {}
|
6
|
+
|
7
|
+
# Check if both current and parent content exist
|
8
|
+
if current_content && parent_content
|
9
|
+
current_sitelinks = current_content['sitelinks']
|
10
|
+
parent_sitelinks = parent_content['sitelinks']
|
11
|
+
|
12
|
+
# Check added sitelinks
|
13
|
+
if current_sitelinks.respond_to?(:each)
|
14
|
+
current_sitelinks.each do |site_key, current_sitelink|
|
15
|
+
unless parent_sitelinks.respond_to?(:key?) && parent_sitelinks.key?(site_key)
|
16
|
+
added_sitelinks[site_key] = current_sitelink
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# Check removed sitelinks
|
22
|
+
if parent_sitelinks.respond_to?(:each)
|
23
|
+
parent_sitelinks.each do |site_key, parent_sitelink|
|
24
|
+
unless current_sitelinks.respond_to?(:key?) && current_sitelinks.key?(site_key)
|
25
|
+
removed_sitelinks[site_key] = parent_sitelink
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
# Check changed sitelinks
|
30
|
+
if current_sitelinks && parent_sitelinks
|
31
|
+
current_sitelinks.each do |site_key, current_sitelink|
|
32
|
+
if parent_sitelinks.respond_to?(:key?) && parent_sitelinks.key?(site_key)
|
33
|
+
parent_sitelink = parent_sitelinks[site_key]
|
34
|
+
if current_sitelink != parent_sitelink
|
35
|
+
changed_sitelinks[site_key] = {
|
36
|
+
current: current_sitelink,
|
37
|
+
parent: parent_sitelink
|
38
|
+
}
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
elsif current_content
|
44
|
+
# All sitelinks are added if parent content is nil
|
45
|
+
added_sitelinks = current_content['sitelinks']
|
46
|
+
elsif parent_content
|
47
|
+
# All sitelinks are removed if current content is nil
|
48
|
+
removed_sitelinks = parent_content['sitelinks']
|
49
|
+
end
|
50
|
+
|
51
|
+
# puts "Added sitelinks: #{added_sitelinks}"
|
52
|
+
# puts "Removed sitelinks: #{removed_sitelinks}"
|
53
|
+
# puts "Changed sitelinks: #{changed_sitelinks}"
|
54
|
+
|
55
|
+
{
|
56
|
+
added: added_sitelinks,
|
57
|
+
removed: removed_sitelinks,
|
58
|
+
changed: changed_sitelinks
|
59
|
+
}
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
class Total
|
2
|
+
def self.accumulate_totals(diff_data, total)
|
3
|
+
total[:claims_added] += diff_data[:added_claims]
|
4
|
+
total[:claims_removed] += diff_data[:removed_claims]
|
5
|
+
total[:claims_changed] += diff_data[:changed_claims]
|
6
|
+
total[:qualifiers_added] += diff_data[:added_qualifiers]
|
7
|
+
total[:qualifiers_removed] += diff_data[:removed_qualifiers]
|
8
|
+
total[:qualifiers_changed] += diff_data[:changed_qualifiers]
|
9
|
+
total[:references_added] += diff_data[:added_references]
|
10
|
+
total[:references_removed] += diff_data[:removed_references]
|
11
|
+
total[:references_changed] += diff_data[:changed_references]
|
12
|
+
total[:aliases_added] += diff_data[:added_aliases]
|
13
|
+
total[:aliases_removed] += diff_data[:removed_aliases]
|
14
|
+
total[:aliases_changed] += diff_data[:changed_aliases]
|
15
|
+
total[:labels_added] += diff_data[:added_labels]
|
16
|
+
total[:labels_removed] += diff_data[:removed_labels]
|
17
|
+
total[:labels_changed] += diff_data[:changed_labels]
|
18
|
+
total[:descriptions_added] += diff_data[:added_descriptions]
|
19
|
+
total[:descriptions_removed] += diff_data[:removed_descriptions]
|
20
|
+
total[:descriptions_changed] += diff_data[:changed_descriptions]
|
21
|
+
total[:sitelinks_added] += diff_data[:added_sitelinks]
|
22
|
+
total[:sitelinks_removed] += diff_data[:removed_sitelinks]
|
23
|
+
total[:sitelinks_changed] += diff_data[:changed_sitelinks]
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "lib/wikidata/diff/analyzer/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "wikidata-diff-analyzer"
|
7
|
+
spec.version = Wikidata::Diff::Analyzer::VERSION
|
8
|
+
spec.authors = ["Sulagna Saha"]
|
9
|
+
spec.email = ["saha23s@mtholyoke.edu"]
|
10
|
+
|
11
|
+
spec.summary = "A Ruby gem for analyzing diffs between Wikidata items."
|
12
|
+
spec.description = "This gem provides tools for analyzing diffs between Wikidata items, including retrieving the JSON representation of an item for a specific revision."
|
13
|
+
spec.homepage = "https://github.com/WikiEducationFoundation/wikidata-diff-analyzer"
|
14
|
+
spec.license = "MIT"
|
15
|
+
spec.required_ruby_version = ">= 2.6.0"
|
16
|
+
|
17
|
+
spec.metadata["allowed_push_host"] = "https://rubygems.org"
|
18
|
+
|
19
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
20
|
+
spec.metadata["source_code_uri"] = "https://github.com/WikiEducationFoundation/wikidata-diff-analyzer"
|
21
|
+
spec.metadata["changelog_uri"] = "https://github.com/WikiEducationFoundation/wikidata-diff-analyzer/blob/main/CHANGELOG.md"
|
22
|
+
|
23
|
+
# Specify which files should be added to the gem when it is released.
|
24
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
25
|
+
spec.files = Dir.chdir(__dir__) do
|
26
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
27
|
+
(File.expand_path(f) == __FILE__) || f.start_with?(*%w[bin/ test/ spec/ features/ .git .circleci appveyor])
|
28
|
+
end
|
29
|
+
end
|
30
|
+
spec.bindir = "exe"
|
31
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
32
|
+
spec.require_paths = ["lib"]
|
33
|
+
|
34
|
+
# Adding the dependencies
|
35
|
+
spec.add_dependency 'json', '~> 2.1'
|
36
|
+
end
|
metadata
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wikidata-diff-analyzer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Sulagna Saha
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-07-05 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: json
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.1'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.1'
|
27
|
+
description: This gem provides tools for analyzing diffs between Wikidata items, including
|
28
|
+
retrieving the JSON representation of an item for a specific revision.
|
29
|
+
email:
|
30
|
+
- saha23s@mtholyoke.edu
|
31
|
+
executables: []
|
32
|
+
extensions: []
|
33
|
+
extra_rdoc_files: []
|
34
|
+
files:
|
35
|
+
- CHANGELOG.md
|
36
|
+
- CODE_OF_CONDUCT.md
|
37
|
+
- Gemfile
|
38
|
+
- Gemfile.lock
|
39
|
+
- LICENSE
|
40
|
+
- README.md
|
41
|
+
- Rakefile
|
42
|
+
- lib/wikidata/diff/alias_analyzer.rb
|
43
|
+
- lib/wikidata/diff/analyzer.rb
|
44
|
+
- lib/wikidata/diff/analyzer/version.rb
|
45
|
+
- lib/wikidata/diff/api.rb
|
46
|
+
- lib/wikidata/diff/claim_analyzer.rb
|
47
|
+
- lib/wikidata/diff/description_analyzer.rb
|
48
|
+
- lib/wikidata/diff/label_analyzer.rb
|
49
|
+
- lib/wikidata/diff/large_batches_analyzer.rb
|
50
|
+
- lib/wikidata/diff/mediawiki_login.rb
|
51
|
+
- lib/wikidata/diff/revision_analyzer.rb
|
52
|
+
- lib/wikidata/diff/sitelink_analyzer.rb
|
53
|
+
- lib/wikidata/diff/total.rb
|
54
|
+
- sig/wikidata/diff/analyzer.rbs
|
55
|
+
- wikidata-diff-analyzer.gemspec
|
56
|
+
homepage: https://github.com/WikiEducationFoundation/wikidata-diff-analyzer
|
57
|
+
licenses:
|
58
|
+
- MIT
|
59
|
+
metadata:
|
60
|
+
allowed_push_host: https://rubygems.org
|
61
|
+
homepage_uri: https://github.com/WikiEducationFoundation/wikidata-diff-analyzer
|
62
|
+
source_code_uri: https://github.com/WikiEducationFoundation/wikidata-diff-analyzer
|
63
|
+
changelog_uri: https://github.com/WikiEducationFoundation/wikidata-diff-analyzer/blob/main/CHANGELOG.md
|
64
|
+
post_install_message:
|
65
|
+
rdoc_options: []
|
66
|
+
require_paths:
|
67
|
+
- lib
|
68
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: 2.6.0
|
73
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
requirements: []
|
79
|
+
rubygems_version: 3.3.7
|
80
|
+
signing_key:
|
81
|
+
specification_version: 4
|
82
|
+
summary: A Ruby gem for analyzing diffs between Wikidata items.
|
83
|
+
test_files: []
|