wikidata-diff-analyzer 0.1.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/CONTRIBUTING.md +13 -0
- data/Gemfile +0 -2
- data/Gemfile.lock +2 -12
- data/README.md +269 -105
- data/lib/wikidata/diff/alias_analyzer.rb +69 -52
- data/lib/wikidata/diff/analyzer/version.rb +1 -1
- data/lib/wikidata/diff/analyzer.rb +33 -34
- data/lib/wikidata/diff/api.rb +31 -18
- data/lib/wikidata/diff/claim_analyzer.rb +94 -211
- data/lib/wikidata/diff/comment_analyzer.rb +49 -0
- data/lib/wikidata/diff/description_analyzer.rb +57 -35
- data/lib/wikidata/diff/form_analyzer.rb +67 -0
- data/lib/wikidata/diff/gloss_analyzer.rb +71 -0
- data/lib/wikidata/diff/inside_claim_analyzer.rb +84 -0
- data/lib/wikidata/diff/label_analyzer.rb +63 -41
- data/lib/wikidata/diff/large_batches_analyzer.rb +39 -25
- data/lib/wikidata/diff/lemma_analyzer.rb +70 -0
- data/lib/wikidata/diff/qualifier_analyzer.rb +83 -0
- data/lib/wikidata/diff/reference_analyzer.rb +49 -0
- data/lib/wikidata/diff/representation_analyzer.rb +71 -0
- data/lib/wikidata/diff/revision_analyzer.rb +153 -37
- data/lib/wikidata/diff/sense_analyzer.rb +106 -0
- data/lib/wikidata/diff/sitelink_analyzer.rb +3 -7
- data/lib/wikidata/diff/total.rb +31 -0
- data/wikidata-diff-analyzer.gemspec +1 -0
- metadata +12 -3
- data/lib/wikidata/diff/mediawiki_login.rb +0 -12
@@ -2,7 +2,6 @@
|
|
2
2
|
require_relative 'large_batches_analyzer'
|
3
3
|
require_relative 'revision_analyzer'
|
4
4
|
require_relative 'total'
|
5
|
-
require_relative 'mediawiki_login'
|
6
5
|
|
7
6
|
module WikidataDiffAnalyzer
|
8
7
|
class Error < StandardError; end
|
@@ -34,7 +33,37 @@ module WikidataDiffAnalyzer
|
|
34
33
|
descriptions_changed: 0,
|
35
34
|
sitelinks_added: 0,
|
36
35
|
sitelinks_removed: 0,
|
37
|
-
sitelinks_changed: 0
|
36
|
+
sitelinks_changed: 0,
|
37
|
+
lemmas_added: 0,
|
38
|
+
lemmas_removed: 0,
|
39
|
+
lemmas_changed: 0,
|
40
|
+
forms_added: 0,
|
41
|
+
forms_removed: 0,
|
42
|
+
forms_changed: 0,
|
43
|
+
representations_added: 0,
|
44
|
+
representations_removed: 0,
|
45
|
+
representations_changed: 0,
|
46
|
+
formclaims_added: 0,
|
47
|
+
formclaims_removed: 0,
|
48
|
+
formclaims_changed: 0,
|
49
|
+
senses_added: 0,
|
50
|
+
senses_removed: 0,
|
51
|
+
senses_changed: 0,
|
52
|
+
glosses_added: 0,
|
53
|
+
glosses_removed: 0,
|
54
|
+
glosses_changed: 0,
|
55
|
+
senseclaims_added: 0,
|
56
|
+
senseclaims_removed: 0,
|
57
|
+
senseclaims_changed: 0,
|
58
|
+
merge_to: 0,
|
59
|
+
merge_from: 0,
|
60
|
+
redirect: 0,
|
61
|
+
undo: 0,
|
62
|
+
restore: 0,
|
63
|
+
clear_item: 0,
|
64
|
+
create_item: 0,
|
65
|
+
create_property: 0,
|
66
|
+
create_lexeme: 0
|
38
67
|
}
|
39
68
|
|
40
69
|
# if revision_ids has 0, then 0 can never be analyzed, so remove it and add in not analyzed
|
@@ -42,24 +71,13 @@ module WikidataDiffAnalyzer
|
|
42
71
|
revision_ids.delete(0)
|
43
72
|
diffs_not_analyzed << 0
|
44
73
|
end
|
45
|
-
# # if mediawiki can be logged in call for 500, otherwise call for 50
|
46
|
-
# if MediawikiLogin.mediawiki_login
|
47
|
-
# puts 'Logged in to mediawiki'
|
48
|
-
# result = LargeBatchesAnalyzer.handle_large_batches(revision_ids, 500)
|
49
|
-
# else
|
50
|
-
# puts 'Not logged in to mediawiki'
|
51
|
-
# result = LargeBatchesAnalyzer.handle_large_batches(revision_ids, 50)
|
52
|
-
# end
|
53
74
|
|
54
75
|
result = LargeBatchesAnalyzer.handle_large_batches(revision_ids, 50)
|
55
|
-
# result is a hash which has contents like this:
|
56
|
-
# result[revid] = { current_content: data[:content], parent_content: parent_content }
|
57
76
|
|
58
77
|
result.each do |revision_id, revision_data|
|
59
78
|
current_content = revision_data[:current_content]
|
60
|
-
|
61
|
-
|
62
|
-
diff = RevisionAnalyzer.analyze_diff(current_content, parent_content)
|
79
|
+
if current_content
|
80
|
+
diff = RevisionAnalyzer.analyze_diff(revision_data)
|
63
81
|
diffs[revision_id] = diff
|
64
82
|
Total.accumulate_totals(diff, total)
|
65
83
|
diffs_analyzed << revision_id
|
@@ -78,22 +96,3 @@ module WikidataDiffAnalyzer
|
|
78
96
|
}
|
79
97
|
end
|
80
98
|
end
|
81
|
-
|
82
|
-
# edge = [0, 123, 456, 1803628651]
|
83
|
-
# revision_ids = [1765004817]
|
84
|
-
# revision_idss = [1780106722, 1903003546, 1902995129, 1596238100, 1898156691]
|
85
|
-
# revisions =[0, 123, 456, 1780106722, 1596238100, 1898156691, 1895908644, 622872009, 1901195499, 1902995129, 1903003546, 1863882476, 535078533]
|
86
|
-
|
87
|
-
# Generate an array of 500 random revision IDs
|
88
|
-
random_revids = Array.new(500) { rand(1_000_000_000..2_000_000_000) }
|
89
|
-
example = [0, 123, 622872009, 1903003546, 1902995129]
|
90
|
-
# Analyze the revisions
|
91
|
-
result = WikidataDiffAnalyzer.analyze(example)
|
92
|
-
puts "final result"
|
93
|
-
puts result[:diffs_analyzed_count] # Prints the count of analyzed diffs
|
94
|
-
puts result[:diffs_not_analyzed] # Prints the list of revision IDs not analyzed
|
95
|
-
puts result[:diffs] # Prints the detailed analysis of each diff (Key is the revision ID)
|
96
|
-
puts result[:total] # Prints the total stats of all diffs
|
97
|
-
|
98
|
-
|
99
|
-
|
data/lib/wikidata/diff/api.rb
CHANGED
@@ -17,12 +17,11 @@ class Api
|
|
17
17
|
prop: 'revisions',
|
18
18
|
revids: revision_ids.join('|'),
|
19
19
|
rvslots: 'main',
|
20
|
-
rvprop: 'content|ids',
|
20
|
+
rvprop: 'content|ids|comment',
|
21
21
|
format: 'json'
|
22
22
|
)
|
23
23
|
|
24
24
|
if response.nil?
|
25
|
-
puts "No response received for revision IDs: #{revision_ids.join(', ')}"
|
26
25
|
return {}
|
27
26
|
end
|
28
27
|
|
@@ -30,31 +29,45 @@ class Api
|
|
30
29
|
|
31
30
|
# checks if it has pages
|
32
31
|
if response.data['pages'].nil?
|
33
|
-
puts "No pages found in the response for revision IDs: #{revision_ids.join(', ')}"
|
34
32
|
return nil
|
35
33
|
end
|
36
34
|
|
37
35
|
response.data['pages'].keys.each do |page|
|
38
36
|
page = response.data['pages'][page]
|
39
37
|
revisions = page['revisions']
|
40
|
-
|
38
|
+
|
41
39
|
revisions.each do |revision|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
if
|
50
|
-
|
40
|
+
content_model = revision['slots']['main']['contentmodel']
|
41
|
+
if content_model == 'wikibase-item' || content_model == 'wikibase-property' || content_model == 'wikibase-lexeme'
|
42
|
+
if revision.key?('texthidden')
|
43
|
+
puts "Content has been hidden or deleted"
|
44
|
+
revid = revision['revid']
|
45
|
+
parentid = revision['parentid']
|
46
|
+
parsed_contents[revid] = { content: nil, comment: nil, parentid: parentid, model: content_model }
|
47
|
+
# checking if comment has been deleted
|
48
|
+
elsif revision.key?('commenthidden')
|
49
|
+
puts "Comment has been hidden or deleted"
|
50
|
+
revid = revision['revid']
|
51
|
+
content = revision['slots']['main']['*']
|
52
|
+
parentid = revision['parentid']
|
53
|
+
parsed_contents[revid] = { content: JSON.parse(content), comment: nil, parentid: parentid, model: content_model }
|
51
54
|
else
|
52
|
-
|
53
|
-
|
55
|
+
content = revision['slots']['main']['*']
|
56
|
+
revid = revision['revid']
|
57
|
+
comment = revision['comment']
|
58
|
+
parentid = revision['parentid']
|
59
|
+
if revid == 0 || revid.nil?
|
60
|
+
parsed_contents[revid] = { content: nil, comment: nil, parentid: nil, model: 'wikibase-item' }
|
61
|
+
else
|
62
|
+
parsed_contents[revid] = { content: JSON.parse(content), comment: comment, parentid: parentid, model: content_model}
|
63
|
+
end
|
54
64
|
end
|
65
|
+
else
|
66
|
+
puts "Content model is #{content_model}"
|
67
|
+
puts "Revision id is #{revision['revid']}"
|
68
|
+
end
|
55
69
|
end
|
56
|
-
|
57
|
-
end
|
70
|
+
end
|
58
71
|
return parsed_contents
|
59
72
|
rescue MediawikiApi::ApiError => e
|
60
73
|
puts "Error retrieving revision content: #{e.message}"
|
@@ -64,4 +77,4 @@ class Api
|
|
64
77
|
raise e
|
65
78
|
end
|
66
79
|
end
|
67
|
-
end
|
80
|
+
end
|
@@ -1,5 +1,8 @@
|
|
1
|
+
require_relative 'reference_analyzer'
|
2
|
+
require_relative 'qualifier_analyzer'
|
3
|
+
|
1
4
|
class ClaimAnalyzer
|
2
|
-
def self.
|
5
|
+
def self.isolate_claims_differences(current_content, parent_content)
|
3
6
|
# Initialize empty arrays to store the added, removed, and changed claims
|
4
7
|
added_claims = []
|
5
8
|
removed_claims = []
|
@@ -11,234 +14,114 @@ class ClaimAnalyzer
|
|
11
14
|
removed_qualifiers = []
|
12
15
|
changed_qualifiers = []
|
13
16
|
|
14
|
-
if
|
15
|
-
|
16
|
-
added_claims: added_claims,
|
17
|
-
removed_claims: removed_claims,
|
18
|
-
changed_claims: changed_claims,
|
19
|
-
added_references: added_references,
|
20
|
-
removed_references: removed_references,
|
21
|
-
changed_references: changed_references,
|
22
|
-
added_qualifiers: added_qualifiers,
|
23
|
-
removed_qualifiers: removed_qualifiers,
|
24
|
-
changed_qualifiers: changed_qualifiers
|
25
|
-
}
|
26
|
-
end
|
27
|
-
# Iterate over each claim key in the current content
|
28
|
-
current_content["claims"].each do |claim_key, current_claims|
|
29
|
-
# Check if the claim key exists in the parent content
|
30
|
-
if parent_content["claims"].key?(claim_key)
|
31
|
-
parent_claims = parent_content["claims"][claim_key]
|
32
|
-
# Iterate over each claim in the current and parent content
|
33
|
-
current_claims.each_with_index do |current_claim, index|
|
34
|
-
parent_claim = parent_claims[index]
|
35
|
-
if parent_claim.nil?
|
36
|
-
# Claim was added
|
37
|
-
added_claims << { key: claim_key, index: index }
|
38
|
-
# check if there's any references or qualifiers in this claim
|
39
|
-
added_references = reference_updates(current_claim, added_references, claim_key, index)
|
40
|
-
added_qualifiers = qualifier_updates(current_claim, added_qualifiers, claim_key, index)
|
41
|
-
|
42
|
-
elsif current_claim != parent_claim
|
43
|
-
# Claim was changed
|
44
|
-
changed_claims << { key: claim_key, index: index }
|
45
|
-
# check if there's any references or qualifiers in this claim
|
46
|
-
changed = handle_changed_references(current_claim, parent_claim, changed_references, added_references, removed_references, claim_key, index)
|
47
|
-
added_references = changed[:added_references]
|
48
|
-
removed_references = changed[:removed_references]
|
49
|
-
changed_references = changed[:changed_references]
|
50
|
-
changed_qualifiers = handle_changed_qualifiers(current_claim, parent_claim, changed_qualifiers, added_qualifiers, removed_qualifiers, claim_key, index)
|
51
|
-
added_qualifiers = changed_qualifiers[:added_qualifiers]
|
52
|
-
removed_qualifiers = changed_qualifiers[:removed_qualifiers]
|
53
|
-
changed_qualifiers = changed_qualifiers[:changed_qualifiers]
|
54
|
-
end
|
55
|
-
end
|
56
|
-
# Check for removed claims
|
57
|
-
parent_claims.each_with_index do |parent_claim, index|
|
58
|
-
current_claim = current_claims[index]
|
59
|
-
if current_claim.nil?
|
60
|
-
# Claim was removed
|
61
|
-
removed_claims << { key: claim_key, index: index }
|
62
|
-
|
63
|
-
# check if there's any references or qualifiers in this claim
|
64
|
-
removed_references = reference_updates(parent_claim, removed_references, claim_key, index)
|
65
|
-
removed_qualifiers = qualifier_updates(parent_claim, removed_qualifiers, claim_key, index)
|
66
|
-
end
|
67
|
-
end
|
17
|
+
if current_content.nil?
|
18
|
+
current_content_claims = {}
|
68
19
|
else
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
# check if there's any references or qualifiers in this claim
|
73
|
-
added_references = reference_updates(current_claims[index], added_references, claim_key, index)
|
74
|
-
added_qualifiers = qualifier_updates(current_claims[index], added_qualifiers, claim_key, index)
|
20
|
+
current_content_claims = current_content["claims"]
|
21
|
+
if !current_content_claims.is_a?(Hash)
|
22
|
+
current_content_claims = {}
|
75
23
|
end
|
76
24
|
end
|
77
|
-
|
25
|
+
|
78
26
|
|
79
|
-
parent_content
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
removed_references = reference_updates(parent_claims[index], removed_references, claim_key, index)
|
86
|
-
removed_qualifiers = qualifier_updates(parent_claims[index], removed_qualifiers, claim_key, index)
|
27
|
+
if parent_content.nil?
|
28
|
+
parent_content_claims = {}
|
29
|
+
else
|
30
|
+
parent_content_claims = parent_content["claims"]
|
31
|
+
if !parent_content_claims.is_a?(Hash)
|
32
|
+
parent_content_claims = {}
|
87
33
|
end
|
88
34
|
end
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
# helper method for adding and removing references
|
116
|
-
def self.reference_updates(claim, updated_references, claim_key, claim_index)
|
117
|
-
if claim["references"]
|
118
|
-
claim["references"].each_with_index do |current_ref, ref_index|
|
119
|
-
updated_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
|
120
|
-
end
|
121
|
-
end
|
122
|
-
updated_references
|
123
|
-
end
|
124
|
-
|
125
|
-
# helper method for changed references
|
126
|
-
def self.handle_changed_references(current_claim, parent_claim, changed_references, added_references, removed_references, claim_key, claim_index)
|
127
|
-
current_references = current_claim["references"] ? current_claim["references"] : []
|
128
|
-
parent_references = parent_claim["references"] ? parent_claim["references"] : []
|
129
|
-
|
130
|
-
current_references.each_with_index do |current_ref, ref_index|
|
131
|
-
if parent_references.empty?
|
132
|
-
added_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
|
133
|
-
elsif !parent_references.include?(current_ref)
|
134
|
-
added_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
|
135
|
-
elsif ref_modified?(current_ref, parent_references)
|
136
|
-
changed_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
parent_references.each_with_index do |parent_ref, ref_index|
|
141
|
-
if !current_references.include?(parent_ref)
|
142
|
-
removed_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
|
143
|
-
end
|
144
|
-
end
|
35
|
+
|
36
|
+
# if parentid is 0, add all current claims as added claims and return it
|
37
|
+
if parent_content.nil?
|
38
|
+
current_content_claims.each do |claim_key, current_claims|
|
39
|
+
current_claims.each_with_index do |current_claim, index|
|
40
|
+
added_claims << { key: claim_key, index: index }
|
41
|
+
# check if there's any references or qualifiers in this claim
|
42
|
+
added_references = ReferenceAnalyzer.reference_updates(current_claim, added_references, claim_key, index)
|
43
|
+
added_qualifiers = QualifierAnalyzer.qualifier_updates(current_claim, added_qualifiers, claim_key, index)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
else
|
47
|
+
# Iterate over each claim key in the current content
|
48
|
+
current_content_claims.each do |claim_key, current_claims|
|
49
|
+
# Check if the claim key exists in the parent content
|
50
|
+
if parent_content_claims.key?(claim_key)
|
51
|
+
parent_claims = parent_content_claims[claim_key]
|
52
|
+
# Iterate over each claim in the current and parent content
|
53
|
+
current_claims.each_with_index do |current_claim, index|
|
54
|
+
parent_claim = parent_claims[index]
|
55
|
+
if parent_claim.nil?
|
56
|
+
# Claim was added
|
57
|
+
added_claims << { key: claim_key, index: index }
|
58
|
+
# check if there's any references or qualifiers in this claim
|
59
|
+
added_references = ReferenceAnalyzer.reference_updates(current_claim, added_references, claim_key, index)
|
60
|
+
added_qualifiers = QualifierAnalyzer.qualifier_updates(current_claim, added_qualifiers, claim_key, index)
|
145
61
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
end
|
62
|
+
elsif current_claim != parent_claim
|
63
|
+
# Claim was changed
|
64
|
+
changed_claims << { key: claim_key, index: index }
|
65
|
+
# check if there's any references or qualifiers in this claim
|
66
|
+
changed_references_hash = ReferenceAnalyzer.handle_changed_references(current_claim, parent_claim, changed_references, added_references, removed_references, claim_key, index)
|
152
67
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
if current_reference["snaks"] != parent_reference["snaks"]
|
157
|
-
return true
|
158
|
-
end
|
159
|
-
end
|
160
|
-
false
|
161
|
-
end
|
68
|
+
added_references = changed_references_hash[:added_references]
|
69
|
+
removed_references = changed_references_hash[:removed_references]
|
70
|
+
changed_references = changed_references_hash[:changed_references]
|
162
71
|
|
163
|
-
|
164
|
-
# handles added and removed qualifiers
|
165
|
-
def self.qualifier_updates(claim, updated_qualifiers, claim_key, claim_index)
|
166
|
-
if claim["qualifiers"]
|
167
|
-
qualifiers = claim["qualifiers"]
|
168
|
-
qualifiers.each do |qualifier_key, qualifier_values|
|
169
|
-
qualifier_values.each_with_index do |qualifier_value, qualifier_index|
|
170
|
-
updated_qualifiers << {
|
171
|
-
claim_key: claim_key,
|
172
|
-
claim_index: claim_index,
|
173
|
-
qualifier_key: qualifier_key,
|
174
|
-
qualifier_index: qualifier_index
|
175
|
-
}
|
176
|
-
end
|
177
|
-
end
|
178
|
-
end
|
179
|
-
updated_qualifiers
|
180
|
-
end
|
72
|
+
changed_qualifiers_hash = QualifierAnalyzer.handle_changed_qualifiers(current_claim, parent_claim, changed_qualifiers, added_qualifiers, removed_qualifiers, claim_key, index)
|
181
73
|
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
74
|
+
added_qualifiers = changed_qualifiers_hash[:added_qualifiers]
|
75
|
+
removed_qualifiers = changed_qualifiers_hash[:removed_qualifiers]
|
76
|
+
changed_qualifiers = changed_qualifiers_hash[:changed_qualifiers]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
# Check for removed claims
|
80
|
+
parent_claims.each_with_index do |parent_claim, index|
|
81
|
+
current_claim = current_claims[index]
|
82
|
+
if current_claim.nil?
|
83
|
+
# Claim was removed
|
84
|
+
removed_claims << { key: claim_key, index: index }
|
186
85
|
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
# Check if the qualifier index exists in the parent content
|
193
|
-
if !parent.nil?
|
194
|
-
parent = parent[qualifier_index]
|
195
|
-
# check if the parent claim was changed by comparing the objects first
|
196
|
-
if parent != qualifier_value
|
197
|
-
# Claim was changed
|
198
|
-
changed_qualifiers << {
|
199
|
-
claim_key: claim_key,
|
200
|
-
claim_index: claim_index,
|
201
|
-
qualifier_key: qualifier_key,
|
202
|
-
qualifier_index: qualifier_index
|
203
|
-
}
|
204
|
-
end
|
86
|
+
# check if there's any references or qualifiers in this claim
|
87
|
+
removed_references = ReferenceAnalyzer.reference_updates(parent_claim, removed_references, claim_key, index)
|
88
|
+
removed_qualifiers = QualifierAnalyzer.qualifier_updates(parent_claim, removed_qualifiers, claim_key, index)
|
89
|
+
end
|
90
|
+
end
|
205
91
|
else
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
end
|
214
|
-
end
|
215
|
-
end
|
216
|
-
# Check for removed claims
|
217
|
-
parent_qualifiers.each do |qualifier_key, qualifier_values|
|
218
|
-
qualifier_values.each_with_index do |qualifier_value, qualifier_index|
|
219
|
-
if current_qualifiers.key?(qualifier_key)
|
220
|
-
current = current_qualifiers[qualifier_key]
|
92
|
+
# All claims in current content with this key were added
|
93
|
+
current_claims.each_index do |index|
|
94
|
+
added_claims << { key: claim_key, index: index }
|
95
|
+
# check if there's any references or qualifiers in this claim
|
96
|
+
added_references = ReferenceAnalyzer.reference_updates(current_claims[index], added_references, claim_key, index)
|
97
|
+
added_qualifiers = QualifierAnalyzer.qualifier_updates(current_claims[index], added_qualifiers, claim_key, index)
|
98
|
+
end
|
221
99
|
end
|
222
|
-
# Check if the qualifier index exists in the current content
|
223
|
-
if !current.nil?
|
224
|
-
current = current[qualifier_index]
|
225
100
|
end
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
101
|
+
|
102
|
+
parent_content_claims.each do |claim_key, parent_claims|
|
103
|
+
# current content[claims] can be nil
|
104
|
+
parent_claims.each_index do |index|
|
105
|
+
if current_content_claims.nil? || !current_content_claims.key?(claim_key)
|
106
|
+
removed_claims << { key: claim_key, index: index }
|
107
|
+
# check if there's any references or qualifiers in this claim
|
108
|
+
removed_references = ReferenceAnalyzer.reference_updates(parent_claims[index], removed_references, claim_key, index)
|
109
|
+
removed_qualifiers = QualifierAnalyzer.qualifier_updates(parent_claims[index], removed_qualifiers, claim_key, index)
|
110
|
+
end
|
111
|
+
end
|
234
112
|
end
|
235
113
|
end
|
236
|
-
end
|
237
114
|
|
238
115
|
{
|
239
|
-
|
240
|
-
|
241
|
-
|
116
|
+
added_claims: added_claims,
|
117
|
+
removed_claims: removed_claims,
|
118
|
+
changed_claims: changed_claims,
|
119
|
+
added_references: added_references,
|
120
|
+
removed_references: removed_references,
|
121
|
+
changed_references: changed_references,
|
122
|
+
added_qualifiers: added_qualifiers,
|
123
|
+
removed_qualifiers: removed_qualifiers,
|
124
|
+
changed_qualifiers: changed_qualifiers
|
242
125
|
}
|
243
126
|
end
|
244
127
|
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
class CommentAnalyzer
|
2
|
+
def self.isolate_comment_differences(comment)
|
3
|
+
phrases = {
|
4
|
+
'merge_to': 0,
|
5
|
+
'merge_from': 0,
|
6
|
+
'redirect': 0,
|
7
|
+
'undo': 0,
|
8
|
+
'restore': 0,
|
9
|
+
'clear_item': 0,
|
10
|
+
'create_item': 0,
|
11
|
+
}
|
12
|
+
|
13
|
+
if comment.nil?
|
14
|
+
return phrases
|
15
|
+
end
|
16
|
+
|
17
|
+
if comment.include?('wbmergeitems-from')
|
18
|
+
phrases[:merge_from] = 1
|
19
|
+
end
|
20
|
+
|
21
|
+
if comment.include?('wbmergeitems-to')
|
22
|
+
phrases[:merge_to] = 1
|
23
|
+
end
|
24
|
+
|
25
|
+
if comment.include?('wbcreateredirect')
|
26
|
+
phrases[:redirect] = 1
|
27
|
+
end
|
28
|
+
|
29
|
+
if comment.include?('undo:')
|
30
|
+
phrases[:undo] = 1
|
31
|
+
end
|
32
|
+
|
33
|
+
if comment.include?('restore:')
|
34
|
+
phrases[:restore] = 1
|
35
|
+
end
|
36
|
+
|
37
|
+
if comment.include?('wbeditentity-override')
|
38
|
+
phrases[:clear_item] = 1
|
39
|
+
end
|
40
|
+
|
41
|
+
# create-property, create-item, create-lexeme all includes this phrase
|
42
|
+
# so based on content model in revision analyzer, it is decided which one it is
|
43
|
+
if comment.include?('wbeditentity-create')
|
44
|
+
phrases[:create_item] = 1
|
45
|
+
end
|
46
|
+
|
47
|
+
return phrases
|
48
|
+
end
|
49
|
+
end
|