wikidata-diff-analyzer 0.1.1 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Gemfile +0 -2
- data/README.md +268 -105
- data/lib/wikidata/diff/alias_analyzer.rb +69 -52
- data/lib/wikidata/diff/api.rb +31 -18
- data/lib/wikidata/diff/claim_analyzer.rb +94 -211
- data/lib/wikidata/diff/comment_analyzer.rb +49 -0
- data/lib/wikidata/diff/description_analyzer.rb +57 -35
- data/lib/wikidata/diff/form_analyzer.rb +67 -0
- data/lib/wikidata/diff/gloss_analyzer.rb +71 -0
- data/lib/wikidata/diff/inside_claim_analyzer.rb +84 -0
- data/lib/wikidata/diff/label_analyzer.rb +63 -41
- data/lib/wikidata/diff/large_batches_analyzer.rb +39 -24
- data/lib/wikidata/diff/lemma_analyzer.rb +70 -0
- data/lib/wikidata/diff/qualifier_analyzer.rb +83 -0
- data/lib/wikidata/diff/reference_analyzer.rb +49 -0
- data/lib/wikidata/diff/representation_analyzer.rb +71 -0
- data/lib/wikidata/diff/revision_analyzer.rb +153 -37
- data/lib/wikidata/diff/sense_analyzer.rb +106 -0
- data/lib/wikidata/diff/sitelink_analyzer.rb +3 -7
- data/lib/wikidata/diff/total.rb +31 -0
- data/lib/wikidata-diff-analyzer/version.rb +5 -0
- data/lib/{wikidata/diff/analyzer.rb → wikidata-diff-analyzer.rb} +36 -21
- data/wikidata-diff-analyzer.gemspec +14 -8
- metadata +71 -9
- data/CODE_OF_CONDUCT.md +0 -84
- data/Gemfile.lock +0 -81
- data/lib/wikidata/diff/analyzer/version.rb +0 -9
- data/lib/wikidata/diff/mediawiki_login.rb +0 -12
- data/sig/wikidata/diff/analyzer.rbs +0 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e2582edb55fb495d5256a573a6431eaf2ea628872d85a21591badd0d16ad528b
|
4
|
+
data.tar.gz: ac0c0dd70e982f169b6ac52dc882c7f1fbe8c632868ba48368b53f10e530ad60
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 554b47cea6fdf10c41760e19c34102ca12607b26e1a09aad9fa82ce595b19593cacac9feae95da553c1a6afb6c62a75cd6ddf73cf5ff5683f36f3e217f0b3ff7
|
7
|
+
data.tar.gz: 2dcbd83e6f1a93b1bd3ccfeb7d54d81bd89f4f88c290a1f9384658f2291392fa6b4873f44c82bb2240a8fa5b153334fd48b0656e611cb23187d2ecde2ae25192
|
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# WikidataDiffAnalyzer
|
2
2
|
|
3
|
-
Welcome to WikidataDiffAnalyzer
|
3
|
+
Welcome to WikidataDiffAnalyzer! The WikidataDiffAnalyzer is a Ruby gem that provides functionality to parse the differences between Wikidata revisions and extract statistics about the changes. It enables accurate analysis of Wikidata edits, such as counting the number of claims, qualifiers, references, aliases, labels, descriptions and site links added, removed, and changed. The stats for merge-to, merge-from, redirect, restore, undo and item-clearing are also returned. This gem has been developed to enhance Wikidata statistics on the Wiki Education Dashboard and Programs & Events Dashboard, but it can be utilized for various other purposes as well.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
@@ -16,11 +16,12 @@ Alternatively, you can install it directly via:
|
|
16
16
|
## Usage
|
17
17
|
The main method of this gem is `WikidataDiffAnalyzer.analyze`, which receives an array of revision IDs and provides a comprehensive analysis of the differences among them.
|
18
18
|
|
19
|
-
**The input should be an array of integers**. Only the valid revision ids containing wikidata-item revisions
|
19
|
+
**The input should be an array of integers**. Only the valid revision ids including first rev ids containing wikidata-item/wikidata-lexeme/wikidata-property revisions will be analyzed. Other revision ids (such as invalid revision IDs, revision IDs having deleted parent IDs, and revision ids having revisions of wikitext) will be returned as not-analyzed in this version.
|
20
20
|
|
21
|
+
Let's look at an example with some real revision ids -
|
21
22
|
You can look at the HTML version of the difference between the edits with their parent revision below:
|
22
23
|
- 0 (does not exist)
|
23
|
-
- [123](https://www.wikidata.org/w/index.php?&diff=123) (
|
24
|
+
- [123](https://www.wikidata.org/w/index.php?&diff=123) (First revision, considered everything as added)
|
24
25
|
- [622872009](https://www.wikidata.org/w/index.php?&diff=622872009) (added 1 claim)
|
25
26
|
- [1902995129](https://www.wikidata.org/w/index.php?&diff=1902995129) (removed 1 claim, 1 reference, and 1 qualifier)
|
26
27
|
- [1903003546](https://www.wikidata.org/w/index.php?&diff=1903003546) (changed 1 claim, added 1 qualifier)
|
@@ -39,9 +40,9 @@ The output is a hash including the information below:
|
|
39
40
|
```
|
40
41
|
{
|
41
42
|
diffs_analyzed_count: 3,
|
42
|
-
diffs_not_analyzed: [0
|
43
|
-
diffs: {
|
44
|
-
total: {claims_added:
|
43
|
+
diffs_not_analyzed: [0],
|
44
|
+
diffs: {123: {...}, 622872009: {...}, 1903003546: {...}, 1902995129: {...}},
|
45
|
+
total: {claims_added: 1, claims_removed: 1, claims_changed: 1 ...}
|
45
46
|
}
|
46
47
|
|
47
48
|
```
|
@@ -57,109 +58,271 @@ puts result[:total] # Prints the total stats of all diffs
|
|
57
58
|
Here's the full output structure:
|
58
59
|
```
|
59
60
|
# the count of analyzed diffs
|
60
|
-
|
61
|
+
4
|
61
62
|
# the list of revision IDs not analyzed
|
62
63
|
0
|
63
|
-
123
|
64
64
|
# the detailed analysis of each diff (Key is the revision ID)
|
65
|
-
{
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
:
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
65
|
+
{123=>
|
66
|
+
{:added_claims=>0,
|
67
|
+
:removed_claims=>0,
|
68
|
+
:changed_claims=>0,
|
69
|
+
:added_references=>0,
|
70
|
+
:removed_references=>0,
|
71
|
+
:changed_references=>0,
|
72
|
+
:added_qualifiers=>0,
|
73
|
+
:removed_qualifiers=>0,
|
74
|
+
:changed_qualifiers=>0,
|
75
|
+
:added_aliases=>0,
|
76
|
+
:removed_aliases=>0,
|
77
|
+
:changed_aliases=>0,
|
78
|
+
:added_labels=>1,
|
79
|
+
:removed_labels=>0,
|
80
|
+
:changed_labels=>0,
|
81
|
+
:added_descriptions=>1,
|
82
|
+
:removed_descriptions=>0,
|
83
|
+
:changed_descriptions=>0,
|
84
|
+
:added_sitelinks=>0,
|
85
|
+
:removed_sitelinks=>0,
|
86
|
+
:changed_sitelinks=>0,
|
87
|
+
:merge_to=>0,
|
88
|
+
:merge_from=>0,
|
89
|
+
:redirect=>0,
|
90
|
+
:undo=>0,
|
91
|
+
:restore=>0,
|
92
|
+
:clear_item=>0,
|
93
|
+
:create_item=>0,
|
94
|
+
:added_lemmas=>0,
|
95
|
+
:removed_lemmas=>0,
|
96
|
+
:changed_lemmas=>0,
|
97
|
+
:added_forms=>0,
|
98
|
+
:removed_forms=>0,
|
99
|
+
:changed_forms=>0,
|
100
|
+
:added_senses=>0,
|
101
|
+
:removed_senses=>0,
|
102
|
+
:changed_senses=>0,
|
103
|
+
:create_property=>0,
|
104
|
+
:create_lexeme=>0,
|
105
|
+
:added_representations=>0,
|
106
|
+
:removed_representations=>0,
|
107
|
+
:changed_representations=>0,
|
108
|
+
:added_glosses=>0,
|
109
|
+
:removed_glosses=>0,
|
110
|
+
:changed_glosses=>0,
|
111
|
+
:added_formclaims=>0,
|
112
|
+
:removed_formclaims=>0,
|
113
|
+
:changed_formclaims=>0,
|
114
|
+
:added_senseclaims=>0,
|
115
|
+
:removed_senseclaims=>0,
|
116
|
+
:changed_senseclaims=>0},
|
117
|
+
622872009=>
|
118
|
+
{:added_claims=>1,
|
119
|
+
:removed_claims=>0,
|
120
|
+
:changed_claims=>0,
|
121
|
+
:added_references=>0,
|
122
|
+
:removed_references=>0,
|
123
|
+
:changed_references=>0,
|
124
|
+
:added_qualifiers=>0,
|
125
|
+
:removed_qualifiers=>0,
|
126
|
+
:changed_qualifiers=>0,
|
127
|
+
:added_aliases=>0,
|
128
|
+
:removed_aliases=>0,
|
129
|
+
:changed_aliases=>0,
|
130
|
+
:added_labels=>0,
|
131
|
+
:removed_labels=>0,
|
132
|
+
:changed_labels=>0,
|
133
|
+
:added_descriptions=>0,
|
134
|
+
:removed_descriptions=>0,
|
135
|
+
:changed_descriptions=>0,
|
136
|
+
:added_sitelinks=>0,
|
137
|
+
:removed_sitelinks=>0,
|
138
|
+
:changed_sitelinks=>0,
|
139
|
+
:merge_to=>0,
|
140
|
+
:merge_from=>0,
|
141
|
+
:redirect=>0,
|
142
|
+
:undo=>0,
|
143
|
+
:restore=>0,
|
144
|
+
:clear_item=>0,
|
145
|
+
:create_item=>0,
|
146
|
+
:added_lemmas=>0,
|
147
|
+
:removed_lemmas=>0,
|
148
|
+
:changed_lemmas=>0,
|
149
|
+
:added_forms=>0,
|
150
|
+
:removed_forms=>0,
|
151
|
+
:changed_forms=>0,
|
152
|
+
:added_senses=>0,
|
153
|
+
:removed_senses=>0,
|
154
|
+
:changed_senses=>0,
|
155
|
+
:create_property=>0,
|
156
|
+
:create_lexeme=>0,
|
157
|
+
:added_representations=>0,
|
158
|
+
:removed_representations=>0,
|
159
|
+
:changed_representations=>0,
|
160
|
+
:added_glosses=>0,
|
161
|
+
:removed_glosses=>0,
|
162
|
+
:changed_glosses=>0,
|
163
|
+
:added_formclaims=>0,
|
164
|
+
:removed_formclaims=>0,
|
165
|
+
:changed_formclaims=>0,
|
166
|
+
:added_senseclaims=>0,
|
167
|
+
:removed_senseclaims=>0,
|
168
|
+
:changed_senseclaims=>0},
|
169
|
+
1902995129=>
|
170
|
+
{:added_claims=>0,
|
171
|
+
:removed_claims=>1,
|
172
|
+
:changed_claims=>0,
|
173
|
+
:added_references=>0,
|
174
|
+
:removed_references=>1,
|
175
|
+
:changed_references=>0,
|
176
|
+
:added_qualifiers=>0,
|
177
|
+
:removed_qualifiers=>1,
|
178
|
+
:changed_qualifiers=>0,
|
179
|
+
:added_aliases=>0,
|
180
|
+
:removed_aliases=>0,
|
181
|
+
:changed_aliases=>0,
|
182
|
+
:added_labels=>0,
|
183
|
+
:removed_labels=>0,
|
184
|
+
:changed_labels=>0,
|
185
|
+
:added_descriptions=>0,
|
186
|
+
:removed_descriptions=>0,
|
187
|
+
:changed_descriptions=>0,
|
188
|
+
:added_sitelinks=>0,
|
189
|
+
:removed_sitelinks=>0,
|
190
|
+
:changed_sitelinks=>0,
|
191
|
+
:merge_to=>0,
|
192
|
+
:merge_from=>0,
|
193
|
+
:redirect=>0,
|
194
|
+
:undo=>0,
|
195
|
+
:restore=>0,
|
196
|
+
:clear_item=>0,
|
197
|
+
:create_item=>0,
|
198
|
+
:added_lemmas=>0,
|
199
|
+
:removed_lemmas=>0,
|
200
|
+
:changed_lemmas=>0,
|
201
|
+
:added_forms=>0,
|
202
|
+
:removed_forms=>0,
|
203
|
+
:changed_forms=>0,
|
204
|
+
:added_senses=>0,
|
205
|
+
:removed_senses=>0,
|
206
|
+
:changed_senses=>0,
|
207
|
+
:create_property=>0,
|
208
|
+
:create_lexeme=>0,
|
209
|
+
:added_representations=>0,
|
210
|
+
:removed_representations=>0,
|
211
|
+
:changed_representations=>0,
|
212
|
+
:added_glosses=>0,
|
213
|
+
:removed_glosses=>0,
|
214
|
+
:changed_glosses=>0,
|
215
|
+
:added_formclaims=>0,
|
216
|
+
:removed_formclaims=>0,
|
217
|
+
:changed_formclaims=>0,
|
218
|
+
:added_senseclaims=>0,
|
219
|
+
:removed_senseclaims=>0,
|
220
|
+
:changed_senseclaims=>0},
|
221
|
+
1903003546=>
|
222
|
+
{:added_claims=>0,
|
223
|
+
:removed_claims=>0,
|
224
|
+
:changed_claims=>1,
|
225
|
+
:added_references=>0,
|
226
|
+
:removed_references=>0,
|
227
|
+
:changed_references=>0,
|
228
|
+
:added_qualifiers=>1,
|
229
|
+
:removed_qualifiers=>0,
|
230
|
+
:changed_qualifiers=>0,
|
231
|
+
:added_aliases=>0,
|
232
|
+
:removed_aliases=>0,
|
233
|
+
:changed_aliases=>0,
|
234
|
+
:added_labels=>0,
|
235
|
+
:removed_labels=>0,
|
236
|
+
:changed_labels=>0,
|
237
|
+
:added_descriptions=>0,
|
238
|
+
:removed_descriptions=>0,
|
239
|
+
:changed_descriptions=>0,
|
240
|
+
:added_sitelinks=>0,
|
241
|
+
:removed_sitelinks=>0,
|
242
|
+
:changed_sitelinks=>0,
|
243
|
+
:merge_to=>0,
|
244
|
+
:merge_from=>0,
|
245
|
+
:redirect=>0,
|
246
|
+
:undo=>0,
|
247
|
+
:restore=>0,
|
248
|
+
:clear_item=>0,
|
249
|
+
:create_item=>0,
|
250
|
+
:added_lemmas=>0,
|
251
|
+
:removed_lemmas=>0,
|
252
|
+
:changed_lemmas=>0,
|
253
|
+
:added_forms=>0,
|
254
|
+
:removed_forms=>0,
|
255
|
+
:changed_forms=>0,
|
256
|
+
:added_senses=>0,
|
257
|
+
:removed_senses=>0,
|
258
|
+
:changed_senses=>0,
|
259
|
+
:create_property=>0,
|
260
|
+
:create_lexeme=>0,
|
261
|
+
:added_representations=>0,
|
262
|
+
:removed_representations=>0,
|
263
|
+
:changed_representations=>0,
|
264
|
+
:added_glosses=>0,
|
265
|
+
:removed_glosses=>0,
|
266
|
+
:changed_glosses=>0,
|
267
|
+
:added_formclaims=>0,
|
268
|
+
:removed_formclaims=>0,
|
269
|
+
:changed_formclaims=>0,
|
270
|
+
:added_senseclaims=>0,
|
271
|
+
:removed_senseclaims=>0,
|
272
|
+
:changed_senseclaims=>0}}
|
139
273
|
# the total stats of all diffs
|
140
|
-
|
141
|
-
:claims_added=>1,
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
274
|
+
:total=>
|
275
|
+
{:claims_added=>1,
|
276
|
+
:claims_removed=>1,
|
277
|
+
:claims_changed=>1,
|
278
|
+
:references_added=>0,
|
279
|
+
:references_removed=>1,
|
280
|
+
:references_changed=>0,
|
281
|
+
:qualifiers_added=>1,
|
282
|
+
:qualifiers_removed=>1,
|
283
|
+
:qualifiers_changed=>0,
|
284
|
+
:aliases_added=>0,
|
285
|
+
:aliases_removed=>0,
|
286
|
+
:aliases_changed=>0,
|
287
|
+
:labels_added=>1,
|
288
|
+
:labels_removed=>0,
|
289
|
+
:labels_changed=>0,
|
290
|
+
:descriptions_added=>1,
|
291
|
+
:descriptions_removed=>0,
|
292
|
+
:descriptions_changed=>0,
|
293
|
+
:sitelinks_added=>0,
|
294
|
+
:sitelinks_removed=>0,
|
295
|
+
:sitelinks_changed=>0,
|
296
|
+
:lemmas_added=>0,
|
297
|
+
:lemmas_removed=>0,
|
298
|
+
:lemmas_changed=>0,
|
299
|
+
:forms_added=>0,
|
300
|
+
:forms_removed=>0,
|
301
|
+
:forms_changed=>0,
|
302
|
+
:representations_added=>0,
|
303
|
+
:representations_removed=>0,
|
304
|
+
:representations_changed=>0,
|
305
|
+
:formclaims_added=>0,
|
306
|
+
:formclaims_removed=>0,
|
307
|
+
:formclaims_changed=>0,
|
308
|
+
:senses_added=>0,
|
309
|
+
:senses_removed=>0,
|
310
|
+
:senses_changed=>0,
|
311
|
+
:glosses_added=>0,
|
312
|
+
:glosses_removed=>0,
|
313
|
+
:glosses_changed=>0,
|
314
|
+
:senseclaims_added=>0,
|
315
|
+
:senseclaims_removed=>0,
|
316
|
+
:senseclaims_changed=>0,
|
317
|
+
:merge_to=>0,
|
318
|
+
:merge_from=>0,
|
319
|
+
:redirect=>0,
|
320
|
+
:undo=>0,
|
321
|
+
:restore=>0,
|
322
|
+
:clear_item=>0,
|
323
|
+
:create_item=>0,
|
324
|
+
:create_property=>0,
|
325
|
+
:create_lexeme=>0}
|
163
326
|
|
164
327
|
|
165
328
|
```
|
@@ -1,71 +1,88 @@
|
|
1
1
|
class AliasAnalyzer
|
2
2
|
def self.isolate_aliases_differences(current_content, parent_content)
|
3
3
|
return {
|
4
|
-
|
5
|
-
|
6
|
-
|
4
|
+
changed_aliases: [],
|
5
|
+
removed_aliases: [],
|
6
|
+
added_aliases: []
|
7
7
|
} if current_content.nil? && parent_content.nil?
|
8
|
-
|
9
|
-
current_aliases = current_content['aliases'] || {}
|
10
|
-
parent_aliases = parent_content['aliases'] || {}
|
11
8
|
|
12
9
|
changed_aliases = []
|
13
10
|
removed_aliases = []
|
14
11
|
added_aliases = []
|
15
|
-
|
16
|
-
if
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
12
|
+
|
13
|
+
if current_content
|
14
|
+
current_aliases = current_content['aliases']
|
15
|
+
if current_aliases.nil? || current_aliases.is_a?(Array)
|
16
|
+
current_aliases = {}
|
17
|
+
end
|
18
|
+
else
|
19
|
+
current_aliases = {}
|
22
20
|
end
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
# Check if the language exists in the parent aliases
|
29
|
-
if parent_aliases_arr
|
30
|
-
# Ensure that current_aliases_arr is always an array
|
31
|
-
current_aliases_arr = [current_aliases_arr] unless current_aliases_arr.is_a?(Array)
|
32
|
-
|
33
|
-
current_aliases_arr.each_with_index do |current_alias, index|
|
34
|
-
parent_alias = parent_aliases_arr[index]
|
35
|
-
if parent_alias.nil?
|
36
|
-
added_aliases << { lang: lang, index: index }
|
37
|
-
elsif current_alias != parent_alias
|
38
|
-
changed_aliases << { lang: lang, index: index }
|
21
|
+
|
22
|
+
if parent_content
|
23
|
+
parent_aliases = parent_content['aliases']
|
24
|
+
if parent_aliases.nil? || parent_aliases.is_a?(Array)
|
25
|
+
parent_aliases = {}
|
39
26
|
end
|
27
|
+
else
|
28
|
+
parent_aliases = {}
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
if parent_content.nil?
|
33
|
+
(current_aliases || {}).each do |lang, current_aliases_arr|
|
34
|
+
current_aliases_arr = [current_aliases_arr] unless current_aliases_arr.is_a?(Array)
|
35
|
+
|
36
|
+
current_aliases_arr.each_with_index do |current_alias, index|
|
37
|
+
added_aliases << { lang: lang, index: index }
|
38
|
+
end
|
40
39
|
end
|
41
40
|
else
|
42
|
-
#
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
41
|
+
# Iterate over each language in the current aliases
|
42
|
+
(current_aliases || {}).each do |lang, current_aliases_arr|
|
43
|
+
parent_aliases_arr = parent_aliases[lang]
|
44
|
+
|
45
|
+
# Check if the language exists in the parent aliases
|
46
|
+
if parent_aliases_arr
|
47
|
+
# Ensure that current_aliases_arr is always an array
|
48
|
+
current_aliases_arr = [current_aliases_arr] unless current_aliases_arr.is_a?(Array)
|
49
|
+
|
50
|
+
current_aliases_arr.each_with_index do |current_alias, index|
|
51
|
+
parent_alias = parent_aliases_arr[index]
|
52
|
+
if parent_alias.nil?
|
53
|
+
added_aliases << { lang: lang, index: index }
|
54
|
+
elsif current_alias != parent_alias
|
55
|
+
changed_aliases << { lang: lang, index: index }
|
56
|
+
end
|
57
|
+
end
|
58
|
+
else
|
59
|
+
# Ensure that current_aliases_arr is always an array
|
60
|
+
current_aliases_arr = [current_aliases_arr] unless current_aliases_arr.is_a?(Array)
|
61
|
+
|
62
|
+
current_aliases_arr.each_with_index do |current_alias, index|
|
63
|
+
added_aliases << { lang: lang, index: index }
|
64
|
+
end
|
65
|
+
end
|
47
66
|
end
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
67
|
+
|
68
|
+
# Iterate over each language in the parent aliases to find removed aliases
|
69
|
+
(parent_aliases || {}).each do |lang, parent_aliases_arr|
|
70
|
+
# Ensure that parent_aliases_arr is always an array
|
71
|
+
parent_aliases_arr = [parent_aliases_arr] unless parent_aliases_arr.is_a?(Array)
|
72
|
+
|
73
|
+
current_aliases_arr = current_aliases[lang]
|
74
|
+
|
75
|
+
if current_aliases_arr.nil?
|
76
|
+
parent_aliases_arr.each_index do |index|
|
77
|
+
removed_aliases << { lang: lang, index: index }
|
78
|
+
end
|
79
|
+
end
|
61
80
|
end
|
62
81
|
end
|
63
|
-
end
|
64
|
-
|
65
82
|
{
|
66
|
-
|
67
|
-
|
68
|
-
|
83
|
+
changed_aliases: changed_aliases,
|
84
|
+
removed_aliases: removed_aliases,
|
85
|
+
added_aliases: added_aliases
|
69
86
|
}
|
70
87
|
end
|
71
88
|
end
|
data/lib/wikidata/diff/api.rb
CHANGED
@@ -17,12 +17,11 @@ class Api
|
|
17
17
|
prop: 'revisions',
|
18
18
|
revids: revision_ids.join('|'),
|
19
19
|
rvslots: 'main',
|
20
|
-
rvprop: 'content|ids',
|
20
|
+
rvprop: 'content|ids|comment',
|
21
21
|
format: 'json'
|
22
22
|
)
|
23
23
|
|
24
24
|
if response.nil?
|
25
|
-
puts "No response received for revision IDs: #{revision_ids.join(', ')}"
|
26
25
|
return {}
|
27
26
|
end
|
28
27
|
|
@@ -30,31 +29,45 @@ class Api
|
|
30
29
|
|
31
30
|
# checks if it has pages
|
32
31
|
if response.data['pages'].nil?
|
33
|
-
puts "No pages found in the response for revision IDs: #{revision_ids.join(', ')}"
|
34
32
|
return nil
|
35
33
|
end
|
36
34
|
|
37
35
|
response.data['pages'].keys.each do |page|
|
38
36
|
page = response.data['pages'][page]
|
39
37
|
revisions = page['revisions']
|
40
|
-
|
38
|
+
|
41
39
|
revisions.each do |revision|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
if
|
50
|
-
|
40
|
+
content_model = revision['slots']['main']['contentmodel']
|
41
|
+
if content_model == 'wikibase-item' || content_model == 'wikibase-property' || content_model == 'wikibase-lexeme'
|
42
|
+
if revision.key?('texthidden')
|
43
|
+
puts "Content has been hidden or deleted"
|
44
|
+
revid = revision['revid']
|
45
|
+
parentid = revision['parentid']
|
46
|
+
parsed_contents[revid] = { content: nil, comment: nil, parentid: parentid, model: content_model }
|
47
|
+
# checking if comment has been deleted
|
48
|
+
elsif revision.key?('commenthidden')
|
49
|
+
puts "Comment has been hidden or deleted"
|
50
|
+
revid = revision['revid']
|
51
|
+
content = revision['slots']['main']['*']
|
52
|
+
parentid = revision['parentid']
|
53
|
+
parsed_contents[revid] = { content: JSON.parse(content), comment: nil, parentid: parentid, model: content_model }
|
51
54
|
else
|
52
|
-
|
53
|
-
|
55
|
+
content = revision['slots']['main']['*']
|
56
|
+
revid = revision['revid']
|
57
|
+
comment = revision['comment']
|
58
|
+
parentid = revision['parentid']
|
59
|
+
if revid == 0 || revid.nil?
|
60
|
+
parsed_contents[revid] = { content: nil, comment: nil, parentid: nil, model: 'wikibase-item' }
|
61
|
+
else
|
62
|
+
parsed_contents[revid] = { content: JSON.parse(content), comment: comment, parentid: parentid, model: content_model}
|
63
|
+
end
|
54
64
|
end
|
65
|
+
else
|
66
|
+
puts "Content model is #{content_model}"
|
67
|
+
puts "Revision id is #{revision['revid']}"
|
68
|
+
end
|
55
69
|
end
|
56
|
-
|
57
|
-
end
|
70
|
+
end
|
58
71
|
return parsed_contents
|
59
72
|
rescue MediawikiApi::ApiError => e
|
60
73
|
puts "Error retrieving revision content: #{e.message}"
|
@@ -64,4 +77,4 @@ class Api
|
|
64
77
|
raise e
|
65
78
|
end
|
66
79
|
end
|
67
|
-
end
|
80
|
+
end
|