wikidata-diff-analyzer 0.1.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/CONTRIBUTING.md +13 -0
- data/Gemfile +0 -2
- data/README.md +268 -105
- data/lib/wikidata/diff/alias_analyzer.rb +69 -52
- data/lib/wikidata/diff/analyzer/version.rb +1 -1
- data/lib/wikidata/diff/analyzer.rb +33 -18
- data/lib/wikidata/diff/api.rb +31 -18
- data/lib/wikidata/diff/claim_analyzer.rb +94 -211
- data/lib/wikidata/diff/comment_analyzer.rb +49 -0
- data/lib/wikidata/diff/description_analyzer.rb +57 -35
- data/lib/wikidata/diff/form_analyzer.rb +67 -0
- data/lib/wikidata/diff/gloss_analyzer.rb +71 -0
- data/lib/wikidata/diff/inside_claim_analyzer.rb +84 -0
- data/lib/wikidata/diff/label_analyzer.rb +63 -41
- data/lib/wikidata/diff/large_batches_analyzer.rb +39 -24
- data/lib/wikidata/diff/lemma_analyzer.rb +70 -0
- data/lib/wikidata/diff/qualifier_analyzer.rb +83 -0
- data/lib/wikidata/diff/reference_analyzer.rb +49 -0
- data/lib/wikidata/diff/representation_analyzer.rb +71 -0
- data/lib/wikidata/diff/revision_analyzer.rb +153 -37
- data/lib/wikidata/diff/sense_analyzer.rb +106 -0
- data/lib/wikidata/diff/sitelink_analyzer.rb +3 -7
- data/lib/wikidata/diff/total.rb +31 -0
- metadata +12 -3
- data/lib/wikidata/diff/mediawiki_login.rb +0 -12
@@ -2,7 +2,6 @@
|
|
2
2
|
require_relative 'large_batches_analyzer'
|
3
3
|
require_relative 'revision_analyzer'
|
4
4
|
require_relative 'total'
|
5
|
-
require_relative 'mediawiki_login'
|
6
5
|
|
7
6
|
module WikidataDiffAnalyzer
|
8
7
|
class Error < StandardError; end
|
@@ -34,7 +33,37 @@ module WikidataDiffAnalyzer
|
|
34
33
|
descriptions_changed: 0,
|
35
34
|
sitelinks_added: 0,
|
36
35
|
sitelinks_removed: 0,
|
37
|
-
sitelinks_changed: 0
|
36
|
+
sitelinks_changed: 0,
|
37
|
+
lemmas_added: 0,
|
38
|
+
lemmas_removed: 0,
|
39
|
+
lemmas_changed: 0,
|
40
|
+
forms_added: 0,
|
41
|
+
forms_removed: 0,
|
42
|
+
forms_changed: 0,
|
43
|
+
representations_added: 0,
|
44
|
+
representations_removed: 0,
|
45
|
+
representations_changed: 0,
|
46
|
+
formclaims_added: 0,
|
47
|
+
formclaims_removed: 0,
|
48
|
+
formclaims_changed: 0,
|
49
|
+
senses_added: 0,
|
50
|
+
senses_removed: 0,
|
51
|
+
senses_changed: 0,
|
52
|
+
glosses_added: 0,
|
53
|
+
glosses_removed: 0,
|
54
|
+
glosses_changed: 0,
|
55
|
+
senseclaims_added: 0,
|
56
|
+
senseclaims_removed: 0,
|
57
|
+
senseclaims_changed: 0,
|
58
|
+
merge_to: 0,
|
59
|
+
merge_from: 0,
|
60
|
+
redirect: 0,
|
61
|
+
undo: 0,
|
62
|
+
restore: 0,
|
63
|
+
clear_item: 0,
|
64
|
+
create_item: 0,
|
65
|
+
create_property: 0,
|
66
|
+
create_lexeme: 0
|
38
67
|
}
|
39
68
|
|
40
69
|
# if revision_ids has 0, then 0 can never be analyzed, so remove it and add in not analyzed
|
@@ -42,24 +71,13 @@ module WikidataDiffAnalyzer
|
|
42
71
|
revision_ids.delete(0)
|
43
72
|
diffs_not_analyzed << 0
|
44
73
|
end
|
45
|
-
# # if mediawiki can be logged in call for 500, otherwise call for 50
|
46
|
-
# if MediawikiLogin.mediawiki_login
|
47
|
-
# puts 'Logged in to mediawiki'
|
48
|
-
# result = LargeBatchesAnalyzer.handle_large_batches(revision_ids, 500)
|
49
|
-
# else
|
50
|
-
# puts 'Not logged in to mediawiki'
|
51
|
-
# result = LargeBatchesAnalyzer.handle_large_batches(revision_ids, 50)
|
52
|
-
# end
|
53
74
|
|
54
75
|
result = LargeBatchesAnalyzer.handle_large_batches(revision_ids, 50)
|
55
|
-
# result is a hash which has contents like this:
|
56
|
-
# result[revid] = { current_content: data[:content], parent_content: parent_content }
|
57
76
|
|
58
77
|
result.each do |revision_id, revision_data|
|
59
78
|
current_content = revision_data[:current_content]
|
60
|
-
|
61
|
-
|
62
|
-
diff = RevisionAnalyzer.analyze_diff(current_content, parent_content)
|
79
|
+
if current_content
|
80
|
+
diff = RevisionAnalyzer.analyze_diff(revision_data)
|
63
81
|
diffs[revision_id] = diff
|
64
82
|
Total.accumulate_totals(diff, total)
|
65
83
|
diffs_analyzed << revision_id
|
@@ -78,6 +96,3 @@ module WikidataDiffAnalyzer
|
|
78
96
|
}
|
79
97
|
end
|
80
98
|
end
|
81
|
-
|
82
|
-
|
83
|
-
|
data/lib/wikidata/diff/api.rb
CHANGED
@@ -17,12 +17,11 @@ class Api
|
|
17
17
|
prop: 'revisions',
|
18
18
|
revids: revision_ids.join('|'),
|
19
19
|
rvslots: 'main',
|
20
|
-
rvprop: 'content|ids',
|
20
|
+
rvprop: 'content|ids|comment',
|
21
21
|
format: 'json'
|
22
22
|
)
|
23
23
|
|
24
24
|
if response.nil?
|
25
|
-
puts "No response received for revision IDs: #{revision_ids.join(', ')}"
|
26
25
|
return {}
|
27
26
|
end
|
28
27
|
|
@@ -30,31 +29,45 @@ class Api
|
|
30
29
|
|
31
30
|
# checks if it has pages
|
32
31
|
if response.data['pages'].nil?
|
33
|
-
puts "No pages found in the response for revision IDs: #{revision_ids.join(', ')}"
|
34
32
|
return nil
|
35
33
|
end
|
36
34
|
|
37
35
|
response.data['pages'].keys.each do |page|
|
38
36
|
page = response.data['pages'][page]
|
39
37
|
revisions = page['revisions']
|
40
|
-
|
38
|
+
|
41
39
|
revisions.each do |revision|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
if
|
50
|
-
|
40
|
+
content_model = revision['slots']['main']['contentmodel']
|
41
|
+
if content_model == 'wikibase-item' || content_model == 'wikibase-property' || content_model == 'wikibase-lexeme'
|
42
|
+
if revision.key?('texthidden')
|
43
|
+
puts "Content has been hidden or deleted"
|
44
|
+
revid = revision['revid']
|
45
|
+
parentid = revision['parentid']
|
46
|
+
parsed_contents[revid] = { content: nil, comment: nil, parentid: parentid, model: content_model }
|
47
|
+
# checking if comment has been deleted
|
48
|
+
elsif revision.key?('commenthidden')
|
49
|
+
puts "Comment has been hidden or deleted"
|
50
|
+
revid = revision['revid']
|
51
|
+
content = revision['slots']['main']['*']
|
52
|
+
parentid = revision['parentid']
|
53
|
+
parsed_contents[revid] = { content: JSON.parse(content), comment: nil, parentid: parentid, model: content_model }
|
51
54
|
else
|
52
|
-
|
53
|
-
|
55
|
+
content = revision['slots']['main']['*']
|
56
|
+
revid = revision['revid']
|
57
|
+
comment = revision['comment']
|
58
|
+
parentid = revision['parentid']
|
59
|
+
if revid == 0 || revid.nil?
|
60
|
+
parsed_contents[revid] = { content: nil, comment: nil, parentid: nil, model: 'wikibase-item' }
|
61
|
+
else
|
62
|
+
parsed_contents[revid] = { content: JSON.parse(content), comment: comment, parentid: parentid, model: content_model}
|
63
|
+
end
|
54
64
|
end
|
65
|
+
else
|
66
|
+
puts "Content model is #{content_model}"
|
67
|
+
puts "Revision id is #{revision['revid']}"
|
68
|
+
end
|
55
69
|
end
|
56
|
-
|
57
|
-
end
|
70
|
+
end
|
58
71
|
return parsed_contents
|
59
72
|
rescue MediawikiApi::ApiError => e
|
60
73
|
puts "Error retrieving revision content: #{e.message}"
|
@@ -64,4 +77,4 @@ class Api
|
|
64
77
|
raise e
|
65
78
|
end
|
66
79
|
end
|
67
|
-
end
|
80
|
+
end
|
@@ -1,5 +1,8 @@
|
|
1
|
+
require_relative 'reference_analyzer'
|
2
|
+
require_relative 'qualifier_analyzer'
|
3
|
+
|
1
4
|
class ClaimAnalyzer
|
2
|
-
def self.
|
5
|
+
def self.isolate_claims_differences(current_content, parent_content)
|
3
6
|
# Initialize empty arrays to store the added, removed, and changed claims
|
4
7
|
added_claims = []
|
5
8
|
removed_claims = []
|
@@ -11,234 +14,114 @@ class ClaimAnalyzer
|
|
11
14
|
removed_qualifiers = []
|
12
15
|
changed_qualifiers = []
|
13
16
|
|
14
|
-
if
|
15
|
-
|
16
|
-
added_claims: added_claims,
|
17
|
-
removed_claims: removed_claims,
|
18
|
-
changed_claims: changed_claims,
|
19
|
-
added_references: added_references,
|
20
|
-
removed_references: removed_references,
|
21
|
-
changed_references: changed_references,
|
22
|
-
added_qualifiers: added_qualifiers,
|
23
|
-
removed_qualifiers: removed_qualifiers,
|
24
|
-
changed_qualifiers: changed_qualifiers
|
25
|
-
}
|
26
|
-
end
|
27
|
-
# Iterate over each claim key in the current content
|
28
|
-
current_content["claims"].each do |claim_key, current_claims|
|
29
|
-
# Check if the claim key exists in the parent content
|
30
|
-
if parent_content["claims"].key?(claim_key)
|
31
|
-
parent_claims = parent_content["claims"][claim_key]
|
32
|
-
# Iterate over each claim in the current and parent content
|
33
|
-
current_claims.each_with_index do |current_claim, index|
|
34
|
-
parent_claim = parent_claims[index]
|
35
|
-
if parent_claim.nil?
|
36
|
-
# Claim was added
|
37
|
-
added_claims << { key: claim_key, index: index }
|
38
|
-
# check if there's any references or qualifiers in this claim
|
39
|
-
added_references = reference_updates(current_claim, added_references, claim_key, index)
|
40
|
-
added_qualifiers = qualifier_updates(current_claim, added_qualifiers, claim_key, index)
|
41
|
-
|
42
|
-
elsif current_claim != parent_claim
|
43
|
-
# Claim was changed
|
44
|
-
changed_claims << { key: claim_key, index: index }
|
45
|
-
# check if there's any references or qualifiers in this claim
|
46
|
-
changed = handle_changed_references(current_claim, parent_claim, changed_references, added_references, removed_references, claim_key, index)
|
47
|
-
added_references = changed[:added_references]
|
48
|
-
removed_references = changed[:removed_references]
|
49
|
-
changed_references = changed[:changed_references]
|
50
|
-
changed_qualifiers = handle_changed_qualifiers(current_claim, parent_claim, changed_qualifiers, added_qualifiers, removed_qualifiers, claim_key, index)
|
51
|
-
added_qualifiers = changed_qualifiers[:added_qualifiers]
|
52
|
-
removed_qualifiers = changed_qualifiers[:removed_qualifiers]
|
53
|
-
changed_qualifiers = changed_qualifiers[:changed_qualifiers]
|
54
|
-
end
|
55
|
-
end
|
56
|
-
# Check for removed claims
|
57
|
-
parent_claims.each_with_index do |parent_claim, index|
|
58
|
-
current_claim = current_claims[index]
|
59
|
-
if current_claim.nil?
|
60
|
-
# Claim was removed
|
61
|
-
removed_claims << { key: claim_key, index: index }
|
62
|
-
|
63
|
-
# check if there's any references or qualifiers in this claim
|
64
|
-
removed_references = reference_updates(parent_claim, removed_references, claim_key, index)
|
65
|
-
removed_qualifiers = qualifier_updates(parent_claim, removed_qualifiers, claim_key, index)
|
66
|
-
end
|
67
|
-
end
|
17
|
+
if current_content.nil?
|
18
|
+
current_content_claims = {}
|
68
19
|
else
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
# check if there's any references or qualifiers in this claim
|
73
|
-
added_references = reference_updates(current_claims[index], added_references, claim_key, index)
|
74
|
-
added_qualifiers = qualifier_updates(current_claims[index], added_qualifiers, claim_key, index)
|
20
|
+
current_content_claims = current_content["claims"]
|
21
|
+
if !current_content_claims.is_a?(Hash)
|
22
|
+
current_content_claims = {}
|
75
23
|
end
|
76
24
|
end
|
77
|
-
|
25
|
+
|
78
26
|
|
79
|
-
parent_content
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
removed_references = reference_updates(parent_claims[index], removed_references, claim_key, index)
|
86
|
-
removed_qualifiers = qualifier_updates(parent_claims[index], removed_qualifiers, claim_key, index)
|
27
|
+
if parent_content.nil?
|
28
|
+
parent_content_claims = {}
|
29
|
+
else
|
30
|
+
parent_content_claims = parent_content["claims"]
|
31
|
+
if !parent_content_claims.is_a?(Hash)
|
32
|
+
parent_content_claims = {}
|
87
33
|
end
|
88
34
|
end
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
# helper method for adding and removing references
|
116
|
-
def self.reference_updates(claim, updated_references, claim_key, claim_index)
|
117
|
-
if claim["references"]
|
118
|
-
claim["references"].each_with_index do |current_ref, ref_index|
|
119
|
-
updated_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
|
120
|
-
end
|
121
|
-
end
|
122
|
-
updated_references
|
123
|
-
end
|
124
|
-
|
125
|
-
# helper method for changed references
|
126
|
-
def self.handle_changed_references(current_claim, parent_claim, changed_references, added_references, removed_references, claim_key, claim_index)
|
127
|
-
current_references = current_claim["references"] ? current_claim["references"] : []
|
128
|
-
parent_references = parent_claim["references"] ? parent_claim["references"] : []
|
129
|
-
|
130
|
-
current_references.each_with_index do |current_ref, ref_index|
|
131
|
-
if parent_references.empty?
|
132
|
-
added_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
|
133
|
-
elsif !parent_references.include?(current_ref)
|
134
|
-
added_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
|
135
|
-
elsif ref_modified?(current_ref, parent_references)
|
136
|
-
changed_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
parent_references.each_with_index do |parent_ref, ref_index|
|
141
|
-
if !current_references.include?(parent_ref)
|
142
|
-
removed_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
|
143
|
-
end
|
144
|
-
end
|
35
|
+
|
36
|
+
# if parentid is 0, add all current claims as added claims and return it
|
37
|
+
if parent_content.nil?
|
38
|
+
current_content_claims.each do |claim_key, current_claims|
|
39
|
+
current_claims.each_with_index do |current_claim, index|
|
40
|
+
added_claims << { key: claim_key, index: index }
|
41
|
+
# check if there's any references or qualifiers in this claim
|
42
|
+
added_references = ReferenceAnalyzer.reference_updates(current_claim, added_references, claim_key, index)
|
43
|
+
added_qualifiers = QualifierAnalyzer.qualifier_updates(current_claim, added_qualifiers, claim_key, index)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
else
|
47
|
+
# Iterate over each claim key in the current content
|
48
|
+
current_content_claims.each do |claim_key, current_claims|
|
49
|
+
# Check if the claim key exists in the parent content
|
50
|
+
if parent_content_claims.key?(claim_key)
|
51
|
+
parent_claims = parent_content_claims[claim_key]
|
52
|
+
# Iterate over each claim in the current and parent content
|
53
|
+
current_claims.each_with_index do |current_claim, index|
|
54
|
+
parent_claim = parent_claims[index]
|
55
|
+
if parent_claim.nil?
|
56
|
+
# Claim was added
|
57
|
+
added_claims << { key: claim_key, index: index }
|
58
|
+
# check if there's any references or qualifiers in this claim
|
59
|
+
added_references = ReferenceAnalyzer.reference_updates(current_claim, added_references, claim_key, index)
|
60
|
+
added_qualifiers = QualifierAnalyzer.qualifier_updates(current_claim, added_qualifiers, claim_key, index)
|
145
61
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
end
|
62
|
+
elsif current_claim != parent_claim
|
63
|
+
# Claim was changed
|
64
|
+
changed_claims << { key: claim_key, index: index }
|
65
|
+
# check if there's any references or qualifiers in this claim
|
66
|
+
changed_references_hash = ReferenceAnalyzer.handle_changed_references(current_claim, parent_claim, changed_references, added_references, removed_references, claim_key, index)
|
152
67
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
if current_reference["snaks"] != parent_reference["snaks"]
|
157
|
-
return true
|
158
|
-
end
|
159
|
-
end
|
160
|
-
false
|
161
|
-
end
|
68
|
+
added_references = changed_references_hash[:added_references]
|
69
|
+
removed_references = changed_references_hash[:removed_references]
|
70
|
+
changed_references = changed_references_hash[:changed_references]
|
162
71
|
|
163
|
-
|
164
|
-
# handles added and removed qualifiers
|
165
|
-
def self.qualifier_updates(claim, updated_qualifiers, claim_key, claim_index)
|
166
|
-
if claim["qualifiers"]
|
167
|
-
qualifiers = claim["qualifiers"]
|
168
|
-
qualifiers.each do |qualifier_key, qualifier_values|
|
169
|
-
qualifier_values.each_with_index do |qualifier_value, qualifier_index|
|
170
|
-
updated_qualifiers << {
|
171
|
-
claim_key: claim_key,
|
172
|
-
claim_index: claim_index,
|
173
|
-
qualifier_key: qualifier_key,
|
174
|
-
qualifier_index: qualifier_index
|
175
|
-
}
|
176
|
-
end
|
177
|
-
end
|
178
|
-
end
|
179
|
-
updated_qualifiers
|
180
|
-
end
|
72
|
+
changed_qualifiers_hash = QualifierAnalyzer.handle_changed_qualifiers(current_claim, parent_claim, changed_qualifiers, added_qualifiers, removed_qualifiers, claim_key, index)
|
181
73
|
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
74
|
+
added_qualifiers = changed_qualifiers_hash[:added_qualifiers]
|
75
|
+
removed_qualifiers = changed_qualifiers_hash[:removed_qualifiers]
|
76
|
+
changed_qualifiers = changed_qualifiers_hash[:changed_qualifiers]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
# Check for removed claims
|
80
|
+
parent_claims.each_with_index do |parent_claim, index|
|
81
|
+
current_claim = current_claims[index]
|
82
|
+
if current_claim.nil?
|
83
|
+
# Claim was removed
|
84
|
+
removed_claims << { key: claim_key, index: index }
|
186
85
|
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
# Check if the qualifier index exists in the parent content
|
193
|
-
if !parent.nil?
|
194
|
-
parent = parent[qualifier_index]
|
195
|
-
# check if the parent claim was changed by comparing the objects first
|
196
|
-
if parent != qualifier_value
|
197
|
-
# Claim was changed
|
198
|
-
changed_qualifiers << {
|
199
|
-
claim_key: claim_key,
|
200
|
-
claim_index: claim_index,
|
201
|
-
qualifier_key: qualifier_key,
|
202
|
-
qualifier_index: qualifier_index
|
203
|
-
}
|
204
|
-
end
|
86
|
+
# check if there's any references or qualifiers in this claim
|
87
|
+
removed_references = ReferenceAnalyzer.reference_updates(parent_claim, removed_references, claim_key, index)
|
88
|
+
removed_qualifiers = QualifierAnalyzer.qualifier_updates(parent_claim, removed_qualifiers, claim_key, index)
|
89
|
+
end
|
90
|
+
end
|
205
91
|
else
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
end
|
214
|
-
end
|
215
|
-
end
|
216
|
-
# Check for removed claims
|
217
|
-
parent_qualifiers.each do |qualifier_key, qualifier_values|
|
218
|
-
qualifier_values.each_with_index do |qualifier_value, qualifier_index|
|
219
|
-
if current_qualifiers.key?(qualifier_key)
|
220
|
-
current = current_qualifiers[qualifier_key]
|
92
|
+
# All claims in current content with this key were added
|
93
|
+
current_claims.each_index do |index|
|
94
|
+
added_claims << { key: claim_key, index: index }
|
95
|
+
# check if there's any references or qualifiers in this claim
|
96
|
+
added_references = ReferenceAnalyzer.reference_updates(current_claims[index], added_references, claim_key, index)
|
97
|
+
added_qualifiers = QualifierAnalyzer.qualifier_updates(current_claims[index], added_qualifiers, claim_key, index)
|
98
|
+
end
|
221
99
|
end
|
222
|
-
# Check if the qualifier index exists in the current content
|
223
|
-
if !current.nil?
|
224
|
-
current = current[qualifier_index]
|
225
100
|
end
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
101
|
+
|
102
|
+
parent_content_claims.each do |claim_key, parent_claims|
|
103
|
+
# current content[claims] can be nil
|
104
|
+
parent_claims.each_index do |index|
|
105
|
+
if current_content_claims.nil? || !current_content_claims.key?(claim_key)
|
106
|
+
removed_claims << { key: claim_key, index: index }
|
107
|
+
# check if there's any references or qualifiers in this claim
|
108
|
+
removed_references = ReferenceAnalyzer.reference_updates(parent_claims[index], removed_references, claim_key, index)
|
109
|
+
removed_qualifiers = QualifierAnalyzer.qualifier_updates(parent_claims[index], removed_qualifiers, claim_key, index)
|
110
|
+
end
|
111
|
+
end
|
234
112
|
end
|
235
113
|
end
|
236
|
-
end
|
237
114
|
|
238
115
|
{
|
239
|
-
|
240
|
-
|
241
|
-
|
116
|
+
added_claims: added_claims,
|
117
|
+
removed_claims: removed_claims,
|
118
|
+
changed_claims: changed_claims,
|
119
|
+
added_references: added_references,
|
120
|
+
removed_references: removed_references,
|
121
|
+
changed_references: changed_references,
|
122
|
+
added_qualifiers: added_qualifiers,
|
123
|
+
removed_qualifiers: removed_qualifiers,
|
124
|
+
changed_qualifiers: changed_qualifiers
|
242
125
|
}
|
243
126
|
end
|
244
127
|
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
class CommentAnalyzer
|
2
|
+
def self.isolate_comment_differences(comment)
|
3
|
+
phrases = {
|
4
|
+
'merge_to': 0,
|
5
|
+
'merge_from': 0,
|
6
|
+
'redirect': 0,
|
7
|
+
'undo': 0,
|
8
|
+
'restore': 0,
|
9
|
+
'clear_item': 0,
|
10
|
+
'create_item': 0,
|
11
|
+
}
|
12
|
+
|
13
|
+
if comment.nil?
|
14
|
+
return phrases
|
15
|
+
end
|
16
|
+
|
17
|
+
if comment.include?('wbmergeitems-from')
|
18
|
+
phrases[:merge_from] = 1
|
19
|
+
end
|
20
|
+
|
21
|
+
if comment.include?('wbmergeitems-to')
|
22
|
+
phrases[:merge_to] = 1
|
23
|
+
end
|
24
|
+
|
25
|
+
if comment.include?('wbcreateredirect')
|
26
|
+
phrases[:redirect] = 1
|
27
|
+
end
|
28
|
+
|
29
|
+
if comment.include?('undo:')
|
30
|
+
phrases[:undo] = 1
|
31
|
+
end
|
32
|
+
|
33
|
+
if comment.include?('restore:')
|
34
|
+
phrases[:restore] = 1
|
35
|
+
end
|
36
|
+
|
37
|
+
if comment.include?('wbeditentity-override')
|
38
|
+
phrases[:clear_item] = 1
|
39
|
+
end
|
40
|
+
|
41
|
+
# create-property, create-item, create-lexeme all includes this phrase
|
42
|
+
# so based on content model in revision analyzer, it is decided which one it is
|
43
|
+
if comment.include?('wbeditentity-create')
|
44
|
+
phrases[:create_item] = 1
|
45
|
+
end
|
46
|
+
|
47
|
+
return phrases
|
48
|
+
end
|
49
|
+
end
|
@@ -1,49 +1,71 @@
|
|
1
|
-
class DescriptionAnalyzer
|
1
|
+
class DescriptionAnalyzer
|
2
2
|
def self.isolate_descriptions_differences(current_content, parent_content)
|
3
3
|
return {
|
4
|
-
|
5
|
-
|
6
|
-
|
4
|
+
changed_descriptions: [],
|
5
|
+
removed_descriptions: [],
|
6
|
+
added_descriptions: []
|
7
7
|
} if current_content.nil? && parent_content.nil?
|
8
|
-
|
9
|
-
current_descriptions = current_content['descriptions'] || {}
|
10
|
-
parent_descriptions = parent_content['descriptions'] || {}
|
11
8
|
|
9
|
+
if current_content
|
10
|
+
current_descriptions = current_content['descriptions']
|
11
|
+
if current_descriptions.nil? || current_descriptions.is_a?(Array)
|
12
|
+
current_descriptions = {}
|
13
|
+
end
|
14
|
+
else
|
15
|
+
current_descriptions = {}
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
if parent_content
|
20
|
+
parent_descriptions = parent_content['descriptions']
|
21
|
+
if parent_descriptions.nil? || parent_descriptions.is_a?(Array)
|
22
|
+
parent_descriptions = {}
|
23
|
+
end
|
24
|
+
else
|
25
|
+
parent_descriptions = {}
|
26
|
+
end
|
12
27
|
|
13
28
|
changed_descriptions = [] # Initialize as an array
|
14
29
|
removed_descriptions = [] # Initialize as an array
|
15
30
|
added_descriptions = [] # Initialize as an array
|
16
|
-
|
17
|
-
if !current_descriptions.is_a?(Hash) || !parent_descriptions.is_a?(Hash)
|
18
|
-
return{
|
19
|
-
changed: changed_descriptions,
|
20
|
-
removed: removed_descriptions,
|
21
|
-
added: added_descriptions
|
22
|
-
}
|
23
|
-
end
|
24
31
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
32
|
+
|
33
|
+
# if parentid is 0, add all current description as added and return it
|
34
|
+
if parent_content.nil?
|
35
|
+
if !current_descriptions.empty?
|
36
|
+
current_descriptions.each do |lang, description|
|
37
|
+
added_descriptions << { lang: lang }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
return {
|
41
|
+
changed_descriptions: changed_descriptions,
|
42
|
+
removed_descriptions: removed_descriptions,
|
43
|
+
added_descriptions: added_descriptions
|
44
|
+
}
|
45
|
+
else
|
46
|
+
# Iterate over each language in the current descriptions
|
47
|
+
(current_descriptions).each do |lang, current_description|
|
48
|
+
# checking if the parent descriptions is empty
|
49
|
+
if parent_descriptions.empty?
|
50
|
+
added_descriptions << { lang: lang }
|
51
|
+
elsif parent_descriptions[lang].nil?
|
52
|
+
added_descriptions << { lang: lang }
|
53
|
+
elsif current_description != parent_descriptions[lang]
|
54
|
+
changed_descriptions << { lang: lang }
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Iterate over each language in the parent descriptions to find removed descriptions
|
59
|
+
(parent_descriptions).each do |lang, parent_description|
|
60
|
+
if current_descriptions.empty?
|
61
|
+
removed_descriptions << { lang: lang }
|
62
|
+
end
|
63
|
+
end
|
41
64
|
end
|
42
|
-
|
43
65
|
{
|
44
|
-
|
45
|
-
|
46
|
-
|
66
|
+
changed_descriptions: changed_descriptions,
|
67
|
+
removed_descriptions: removed_descriptions,
|
68
|
+
added_descriptions: added_descriptions
|
47
69
|
}
|
48
70
|
end
|
49
71
|
end
|