wikidata-diff-analyzer 0.1.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,49 +1,71 @@
1
- class DescriptionAnalyzer
1
+ class DescriptionAnalyzer
2
2
  def self.isolate_descriptions_differences(current_content, parent_content)
3
3
  return {
4
- changed: [],
5
- removed: [],
6
- added: []
4
+ changed_descriptions: [],
5
+ removed_descriptions: [],
6
+ added_descriptions: []
7
7
  } if current_content.nil? && parent_content.nil?
8
-
9
- current_descriptions = current_content['descriptions'] || {}
10
- parent_descriptions = parent_content['descriptions'] || {}
11
8
 
9
+ if current_content
10
+ current_descriptions = current_content['descriptions']
11
+ if current_descriptions.nil? || current_descriptions.is_a?(Array)
12
+ current_descriptions = {}
13
+ end
14
+ else
15
+ current_descriptions = {}
16
+ end
17
+
18
+
19
+ if parent_content
20
+ parent_descriptions = parent_content['descriptions']
21
+ if parent_descriptions.nil? || parent_descriptions.is_a?(Array)
22
+ parent_descriptions = {}
23
+ end
24
+ else
25
+ parent_descriptions = {}
26
+ end
12
27
 
13
28
  changed_descriptions = [] # Initialize as an array
14
29
  removed_descriptions = [] # Initialize as an array
15
30
  added_descriptions = [] # Initialize as an array
16
-
17
- if !current_descriptions.is_a?(Hash) || !parent_descriptions.is_a?(Hash)
18
- return{
19
- changed: changed_descriptions,
20
- removed: removed_descriptions,
21
- added: added_descriptions
22
- }
23
- end
24
31
 
25
- # Iterate over each language in the current descriptions
26
- (current_descriptions || {}).each do |lang, current_description|
27
- parent_description = parent_descriptions[lang]
28
-
29
- if parent_description.nil?
30
- added_descriptions << { lang: lang }
31
- elsif current_description != parent_description
32
- changed_descriptions << { lang: lang }
33
- end
34
- end
35
-
36
- # Iterate over each language in the parent descriptions to find removed descriptions
37
- (parent_descriptions || {}).each do |lang, parent_description|
38
- if current_descriptions[lang].nil?
39
- removed_descriptions << { lang: lang }
40
- end
32
+
33
+ # if parentid is 0, add all current description as added and return it
34
+ if parent_content.nil?
35
+ if !current_descriptions.empty?
36
+ current_descriptions.each do |lang, description|
37
+ added_descriptions << { lang: lang }
38
+ end
39
+ end
40
+ return {
41
+ changed_descriptions: changed_descriptions,
42
+ removed_descriptions: removed_descriptions,
43
+ added_descriptions: added_descriptions
44
+ }
45
+ else
46
+ # Iterate over each language in the current descriptions
47
+ (current_descriptions).each do |lang, current_description|
48
+ # checking if the parent descriptions is empty
49
+ if parent_descriptions.empty?
50
+ added_descriptions << { lang: lang }
51
+ elsif parent_descriptions[lang].nil?
52
+ added_descriptions << { lang: lang }
53
+ elsif current_description != parent_descriptions[lang]
54
+ changed_descriptions << { lang: lang }
55
+ end
56
+ end
57
+
58
+ # Iterate over each language in the parent descriptions to find removed descriptions
59
+ (parent_descriptions).each do |lang, parent_description|
60
+ if current_descriptions.empty?
61
+ removed_descriptions << { lang: lang }
62
+ end
63
+ end
41
64
  end
42
-
43
65
  {
44
- changed: changed_descriptions,
45
- removed: removed_descriptions,
46
- added: added_descriptions
66
+ changed_descriptions: changed_descriptions,
67
+ removed_descriptions: removed_descriptions,
68
+ added_descriptions: added_descriptions
47
69
  }
48
70
  end
49
71
  end
@@ -0,0 +1,67 @@
1
+ require_relative 'representation_analyzer'
2
+ require_relative 'inside_claim_analyzer'
3
+ class FormAnalyzer
4
+ def self.isolate_forms_differences(current_content, parent_content)
5
+ added_forms = []
6
+ removed_forms = []
7
+ changed_forms = []
8
+ added_representations = []
9
+ removed_representations = []
10
+ changed_representations = []
11
+ added_formclaims = []
12
+ removed_formclaims = []
13
+ changed_formclaims = []
14
+
15
+ current_forms = current_content&.fetch("forms", []) || []
16
+ parent_forms = parent_content&.fetch("forms", []) || []
17
+
18
+ current_forms.each_with_index do |current_form, index|
19
+ parent_form = parent_forms[index]
20
+
21
+ if parent_form.nil?
22
+ # Claim was added
23
+ added_forms << { index: index }
24
+ elsif current_form
25
+ # Claim was changed
26
+ changed_forms << { index: index }
27
+ end
28
+
29
+ representations = RepresentationAnalyzer.isolate_representation_differences(current_form, parent_form)
30
+ added_representations += representations[:added]
31
+ removed_representations += representations[:removed]
32
+ changed_representations += representations[:changed]
33
+
34
+ formclaims = InsideClaimAnalyzer.isolate_inside_claim_differences(current_form, parent_form)
35
+ added_formclaims += formclaims[:added]
36
+ removed_formclaims += formclaims[:removed]
37
+ changed_formclaims += formclaims[:changed]
38
+ end
39
+
40
+ parent_forms.each_with_index do |parent_form, index|
41
+ current_form = current_forms[index]
42
+
43
+ if current_form.nil?
44
+ # Claim was removed
45
+ removed_forms << { index: index }
46
+
47
+ representations = RepresentationAnalyzer.isolate_representation_differences(nil, parent_form)
48
+ removed_representations += representations[:removed]
49
+
50
+ formclaims = InsideClaimAnalyzer.isolate_inside_claim_differences(nil, parent_form)
51
+ removed_formclaims += formclaims[:removed]
52
+ end
53
+ end
54
+
55
+ {
56
+ added_forms: added_forms,
57
+ removed_forms: removed_forms,
58
+ changed_forms: changed_forms,
59
+ added_representations: added_representations,
60
+ removed_representations: removed_representations,
61
+ changed_representations: changed_representations,
62
+ added_formclaims: added_formclaims,
63
+ removed_formclaims: removed_formclaims,
64
+ changed_formclaims: changed_formclaims
65
+ }
66
+ end
67
+ end
@@ -0,0 +1,71 @@
1
+ class GlossAnalyzer
2
+ def self.isolate_gloss_differences(current_content, parent_content)
3
+ return {
4
+ changed: [],
5
+ removed: [],
6
+ added: []
7
+ } if current_content.nil? && parent_content.nil?
8
+
9
+ if current_content
10
+ current_glosses = current_content['glosses']
11
+ if current_glosses.nil? || current_glosses.is_a?(Array)
12
+ current_glosses = {}
13
+ end
14
+ else
15
+ current_glosses = {}
16
+ end
17
+
18
+ if parent_content
19
+ parent_glosses = parent_content['glosses']
20
+ if parent_glosses.nil? || parent_glosses.is_a?(Array)
21
+ parent_glosses = {}
22
+ end
23
+ else
24
+ parent_glosses = {}
25
+ end
26
+
27
+
28
+
29
+ changed = []
30
+ removed = []
31
+ added = []
32
+
33
+ # if parentid is 0, then add all labels as added and return it
34
+ if parent_content.nil?
35
+ current_glosses.each do |lang, label|
36
+ added << { lang: lang }
37
+ end
38
+ return {
39
+ changed: changed,
40
+ removed: removed,
41
+ added: added
42
+ }
43
+ else
44
+
45
+
46
+ # Iterate over each language in the current labels
47
+ (current_glosses || {}).each do |lang, current_gloss|
48
+ parent_gloss = parent_glosses[lang]
49
+
50
+ if parent_gloss.nil?
51
+ added << { lang: lang }
52
+ elsif current_gloss != parent_gloss
53
+ changed << { lang: lang }
54
+ end
55
+ end
56
+
57
+ # Iterate over each language in the parent labels to find removed labels
58
+ (parent_glosses || {}).each do |lang, parent_gloss|
59
+ if current_glosses[lang].nil?
60
+ removed << { lang: lang }
61
+ end
62
+ end
63
+ end
64
+
65
+ {
66
+ changed: changed,
67
+ removed: removed,
68
+ added: added
69
+ }
70
+ end
71
+ end
@@ -0,0 +1,84 @@
1
+ class InsideClaimAnalyzer
2
+ def self.isolate_inside_claim_differences(current_content, parent_content)
3
+ # Initialize empty arrays to store the added, removed, and changed claims
4
+ added = []
5
+ removed = []
6
+ changed = []
7
+
8
+ if current_content.nil?
9
+ current_content_claims = {}
10
+ else
11
+ current_content_claims = current_content["claims"]
12
+ if !current_content_claims.is_a?(Hash)
13
+ current_content_claims = {}
14
+ end
15
+ end
16
+
17
+
18
+ if parent_content.nil?
19
+ parent_content_claims = {}
20
+ else
21
+ parent_content_claims = parent_content["claims"]
22
+ if !parent_content_claims.is_a?(Hash)
23
+ parent_content_claims = {}
24
+ end
25
+ end
26
+
27
+ # if parentid is 0, add all current claims as added claims and return it
28
+ if parent_content.nil?
29
+ current_content_claims.each do |claim_key, current_claims|
30
+ current_claims.each_with_index do |current_claim, index|
31
+ added << { key: claim_key, index: index }
32
+ end
33
+ end
34
+ else
35
+ # Iterate over each claim key in the current content
36
+ current_content_claims.each do |claim_key, current_claims|
37
+ # Check if the claim key exists in the parent content
38
+ if parent_content_claims.key?(claim_key)
39
+ parent_claims = parent_content_claims[claim_key]
40
+ # Iterate over each claim in the current and parent content
41
+ current_claims.each_with_index do |current_claim, index|
42
+ parent_claim = parent_claims[index]
43
+ if parent_claim.nil?
44
+ # Claim was added
45
+ added << { key: claim_key, index: index }
46
+
47
+ elsif current_claim != parent_claim
48
+ # Claim was changed
49
+ changed << { key: claim_key, index: index }
50
+ end
51
+ end
52
+ # Check for removed claims
53
+ parent_claims.each_with_index do |parent_claim, index|
54
+ current_claim = current_claims[index]
55
+ if current_claim.nil?
56
+ # Claim was removed
57
+ removed << { key: claim_key, index: index }
58
+ end
59
+ end
60
+ else
61
+ # All claims in current content with this key were added
62
+ current_claims.each_index do |index|
63
+ added << { key: claim_key, index: index }
64
+ end
65
+ end
66
+ end
67
+
68
+ parent_content_claims.each do |claim_key, parent_claims|
69
+ # current content[claims] can be nil
70
+ parent_claims.each_index do |index|
71
+ if current_content_claims.nil? || !current_content_claims.key?(claim_key)
72
+ removed << { key: claim_key, index: index }
73
+ end
74
+ end
75
+ end
76
+ end
77
+
78
+ {
79
+ added: added,
80
+ removed: removed,
81
+ changed: changed
82
+ }
83
+ end
84
+ end
@@ -1,53 +1,75 @@
1
- class LabelAnalyzer
1
+ class LabelAnalyzer
2
+
2
3
  def self.isolate_labels_differences(current_content, parent_content)
3
4
  return {
4
- changed: [],
5
- removed: [],
6
- added: []
5
+ changed_labels: [],
6
+ removed_labels: [],
7
+ added_labels: []
7
8
  } if current_content.nil? && parent_content.nil?
8
-
9
- current_labels = current_content['labels'] || {}
10
- parent_labels = parent_content['labels'] || {}
11
-
12
- changed_labels = []
13
- removed_labels = []
14
- added_labels = []
15
-
16
- if current_labels.is_a?(Array) || parent_labels.is_a?(Array)
17
- return {
18
- changed: changed_labels,
19
- removed: removed_labels,
20
- added: added_labels
21
- }
9
+
10
+ if current_content
11
+ current_labels = current_content['labels']
12
+ if current_labels.nil? || current_labels.is_a?(Array)
13
+ current_labels = {}
14
+ end
15
+ else
16
+ current_labels = {}
22
17
  end
23
18
 
24
-
25
- # Iterate over each language in the current labels
26
- (current_labels || {}).each do |lang, current_label|
27
- parent_label = parent_labels[lang]
28
-
29
- if parent_label.nil?
30
- added_labels << { lang: lang }
31
- elsif current_label != parent_label
32
- changed_labels << { lang: lang }
33
- end
34
- end
35
-
36
- # Iterate over each language in the parent labels to find removed labels
37
- (parent_labels || {}).each do |lang, parent_label|
38
- if current_labels[lang].nil?
39
- removed_labels << { lang: lang }
40
- end
19
+ if parent_content
20
+ parent_labels = parent_content['labels']
21
+ if parent_labels.nil? || parent_labels.is_a?(Array)
22
+ parent_labels = {}
23
+ end
24
+ else
25
+ parent_labels = {}
41
26
  end
42
27
 
43
- # puts "Changed labels: #{changed_labels}"
44
- # puts "Removed labels: #{removed_labels}"
45
- # puts "Added labels: #{added_labels}"
28
+
29
+
30
+ changed_labels_labels = []
31
+ removed_labels_labels = []
32
+ added_labels_labels = []
33
+
34
+ # if parentid is 0, then add all labels as added_labels and return it
35
+ if parent_content.nil?
36
+ if !current_labels.empty?
37
+ current_labels.each do |lang, label|
38
+ added_labels_labels << { lang: lang }
39
+ end
40
+ end
41
+ return {
42
+ changed_labels: changed_labels_labels,
43
+ removed_labels: removed_labels_labels,
44
+ added_labels: added_labels_labels
45
+ }
46
+ else
47
+ # Iterate over each language in the current labels
48
+ (current_labels).each do |lang, current_label|
49
+ if parent_labels.empty?
50
+ added_labels_labels << { lang: lang }
51
+ else
52
+ parent_label = parent_labels[lang]
53
+ if parent_label.nil?
54
+ added_labels_labels << { lang: lang }
55
+ elsif current_label != parent_label
56
+ changed_labels_labels << { lang: lang }
57
+ end
58
+ end
59
+ end
60
+
61
+ # Iterate over each language in the parent labels to find removed_labels labels
62
+ (parent_labels).each do |lang, parent_label|
63
+ if current_labels.empty?
64
+ removed_labels_labels << { lang: lang }
65
+ end
66
+ end
67
+ end
46
68
 
47
69
  {
48
- changed: changed_labels,
49
- removed: removed_labels,
50
- added: added_labels
70
+ changed_labels: changed_labels_labels,
71
+ removed_labels: removed_labels_labels,
72
+ added_labels: added_labels_labels
51
73
  }
52
74
  end
53
75
  end
@@ -5,35 +5,49 @@ class LargeBatchesAnalyzer
5
5
  def self.handle_large_batches(revision_ids, batch_size)
6
6
  revision_contents = {}
7
7
  parent_contents = {}
8
-
9
-
10
- revision_ids_batches = revision_ids.each_slice(batch_size).to_a
11
- puts "Handling revision_ids_batches: #{revision_ids_batches.length}"
12
- revision_ids_batches.each do |batch|
13
- parsed_contents = Api.get_revision_contents(batch)
14
- if parsed_contents
15
- parent_ids = []
16
- revision_contents.merge!(parsed_contents) if parsed_contents
17
- parsed_contents.values.each do |data|
18
- parent_id = data[:parentid]
19
-
20
- if parent_id != 0 && !parent_id.nil?
21
- parent_ids << parent_id
22
- end
23
- end
24
- parent_contents_batch = Api.get_revision_contents(parent_ids)
25
- parent_contents.merge!(parent_contents_batch) if parent_contents_batch
8
+ first_revisions = []
9
+
10
+ revision_ids.each_slice(batch_size) do |batch|
11
+ parent_ids = []
12
+ parsed_contents = Api.get_revision_contents(batch)
13
+ next unless parsed_contents
14
+
15
+ # I have to check if any of the revision ids in the parsed content has parentid == 0
16
+ parsed_contents.each do |revid, data|
17
+ if data[:parentid] == 0
18
+ first_revisions << revid
19
+ else
20
+ parent_ids << data[:parentid]
26
21
  end
22
+ end
23
+ revision_contents.merge!(parsed_contents)
24
+ parent_contents_batch = Api.get_revision_contents(parent_ids)
25
+ parent_contents.merge!(parent_contents_batch) if parent_contents_batch
27
26
  end
28
-
27
+
28
+ build_result(revision_contents, parent_contents, first_revisions)
29
+ end
30
+
31
+ def self.build_result(revision_contents, parent_contents, first_revisions)
29
32
  result = {}
30
33
  revision_contents.each do |revid, data|
31
- parentid = data[:parentid]
32
- parent_content = parent_contents[parentid] if parentid
33
- current = data ? data[:content] : nil
34
- parent = parent_content ? parent_content[:content] : nil
35
- result[revid] = { current_content: current, parent_content: parent }
34
+ parent_content = parent_contents[data[:parentid]]
35
+ result[revid] = {
36
+ current_content: data&.fetch(:content, nil),
37
+ parent_content: parent_content&.fetch(:content, nil),
38
+ comment: data&.fetch(:comment, nil),
39
+ model: data&.fetch(:model, nil)
40
+ }
41
+ end
42
+ first_revisions.each do |revid|
43
+ result[revid] = {
44
+ current_content: revision_contents[revid]&.fetch(:content, nil),
45
+ parent_content: nil,
46
+ comment: revision_contents[revid]&.fetch(:comment, nil),
47
+ model: revision_contents[revid]&.fetch(:model, nil)
48
+ }
36
49
  end
37
50
  result
38
- end
51
+ end
52
+
39
53
  end
@@ -0,0 +1,70 @@
1
+ class LemmaAnalyzer
2
+ def self.isolate_lemmas_differences(current_content, parent_content)
3
+ return {
4
+ changed_lemmas: [],
5
+ removed_lemmas: [],
6
+ added_lemmas: []
7
+ } if current_content.nil? && parent_content.nil?
8
+
9
+
10
+ if current_content
11
+ current_labels = current_content['lemmas']
12
+ if current_labels.nil? || current_labels.is_a?(Array)
13
+ current_labels = {}
14
+ end
15
+ else
16
+ current_labels = {}
17
+ end
18
+ if parent_content
19
+ parent_labels = parent_content['lemmas']
20
+ if parent_labels.nil? || parent_labels.is_a?(Array)
21
+ parent_labels = {}
22
+ end
23
+ else
24
+ parent_labels = {}
25
+ end
26
+
27
+ changed_labels = []
28
+ removed_labels = []
29
+ added_labels = []
30
+
31
+
32
+ # if parentid is 0, then add all labels as added and return it
33
+ if parent_content.nil?
34
+ current_labels.each do |lang, label|
35
+ added_labels << { lang: lang }
36
+ end
37
+ return {
38
+ changed_lemmas: changed_labels,
39
+ removed_lemmas: removed_labels,
40
+ added_lemmas: added_labels
41
+ }
42
+ else
43
+
44
+
45
+ # Iterate over each language in the current labels
46
+ (current_labels || {}).each do |lang, current_label|
47
+ parent_label = parent_labels[lang]
48
+
49
+ if parent_label.nil?
50
+ added_labels << { lang: lang }
51
+ elsif current_label != parent_label
52
+ changed_labels << { lang: lang }
53
+ end
54
+ end
55
+
56
+ # Iterate over each language in the parent labels to find removed labels
57
+ (parent_labels || {}).each do |lang, parent_label|
58
+ if current_labels[lang].nil?
59
+ removed_labels << { lang: lang }
60
+ end
61
+ end
62
+ end
63
+
64
+ {
65
+ changed_lemmas: changed_labels,
66
+ removed_lemmas: removed_labels,
67
+ added_lemmas: added_labels
68
+ }
69
+ end
70
+ end
@@ -0,0 +1,83 @@
1
+ class QualifierAnalyzer
2
+ # helper method for adding qualifiers
3
+ # handles added and removed qualifiers
4
+ def self.qualifier_updates(claim, updated_qualifiers, claim_key, claim_index)
5
+ if claim["qualifiers"]
6
+ qualifiers = claim["qualifiers"]
7
+ qualifiers.each do |qualifier_key, qualifier_values|
8
+ qualifier_values.each_with_index do |qualifier_value, qualifier_index|
9
+ updated_qualifiers << {
10
+ claim_key: claim_key,
11
+ claim_index: claim_index,
12
+ qualifier_key: qualifier_key,
13
+ qualifier_index: qualifier_index
14
+ }
15
+ end
16
+ end
17
+ end
18
+ updated_qualifiers
19
+ end
20
+
21
+ # helper method for changed qualifiers
22
+ def self.handle_changed_qualifiers(current_claim, parent_claim, changed_qualifiers, added_qualifiers, removed_qualifiers, claim_key, claim_index)
23
+ current_qualifiers = current_claim["qualifiers"] ? current_claim["qualifiers"] : {}
24
+ parent_qualifiers = parent_claim["qualifiers"] ? parent_claim["qualifiers"] : {}
25
+
26
+ current_qualifiers.each do |qualifier_key, qualifier_values|
27
+ qualifier_values.each_with_index do |qualifier_value, qualifier_index|
28
+ if parent_qualifiers.key?(qualifier_key)
29
+ parent = parent_qualifiers[qualifier_key]
30
+ end
31
+ # Check if the qualifier index exists in the parent content
32
+ if !parent.nil?
33
+ parent = parent[qualifier_index]
34
+ # check if the parent claim was changed by comparing the objects first
35
+ if parent != qualifier_value
36
+ # Claim was changed
37
+ changed_qualifiers << {
38
+ claim_key: claim_key,
39
+ claim_index: claim_index,
40
+ qualifier_key: qualifier_key,
41
+ qualifier_index: qualifier_index
42
+ }
43
+ end
44
+ else
45
+ # Claim was added
46
+ added_qualifiers << {
47
+ claim_key: claim_key,
48
+ claim_index: claim_index,
49
+ qualifier_key: qualifier_key,
50
+ qualifier_index: qualifier_index
51
+ }
52
+ end
53
+ end
54
+ end
55
+ # Check for removed claims
56
+ parent_qualifiers.each do |qualifier_key, qualifier_values|
57
+ qualifier_values.each_with_index do |qualifier_value, qualifier_index|
58
+ if current_qualifiers.key?(qualifier_key)
59
+ current = current_qualifiers[qualifier_key]
60
+ end
61
+ # Check if the qualifier index exists in the current content
62
+ if !current.nil?
63
+ current = current[qualifier_index]
64
+ end
65
+ if current.nil?
66
+ # Claim was removed
67
+ removed_qualifiers << {
68
+ claim_key: claim_key,
69
+ claim_index: claim_index,
70
+ qualifier_key: qualifier_key,
71
+ qualifier_index: qualifier_index
72
+ }
73
+ end
74
+ end
75
+ end
76
+
77
+ {
78
+ added_qualifiers: added_qualifiers,
79
+ removed_qualifiers: removed_qualifiers,
80
+ changed_qualifiers: changed_qualifiers
81
+ }
82
+ end
83
+ end