wikidata-diff-analyzer 0.1.1 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,84 @@
1
+ class InsideClaimAnalyzer
2
+ def self.isolate_inside_claim_differences(current_content, parent_content)
3
+ # Initialize empty arrays to store the added, removed, and changed claims
4
+ added = []
5
+ removed = []
6
+ changed = []
7
+
8
+ if current_content.nil?
9
+ current_content_claims = {}
10
+ else
11
+ current_content_claims = current_content["claims"]
12
+ if !current_content_claims.is_a?(Hash)
13
+ current_content_claims = {}
14
+ end
15
+ end
16
+
17
+
18
+ if parent_content.nil?
19
+ parent_content_claims = {}
20
+ else
21
+ parent_content_claims = parent_content["claims"]
22
+ if !parent_content_claims.is_a?(Hash)
23
+ parent_content_claims = {}
24
+ end
25
+ end
26
+
27
+ # if parentid is 0, add all current claims as added claims and return it
28
+ if parent_content.nil?
29
+ current_content_claims.each do |claim_key, current_claims|
30
+ current_claims.each_with_index do |current_claim, index|
31
+ added << { key: claim_key, index: index }
32
+ end
33
+ end
34
+ else
35
+ # Iterate over each claim key in the current content
36
+ current_content_claims.each do |claim_key, current_claims|
37
+ # Check if the claim key exists in the parent content
38
+ if parent_content_claims.key?(claim_key)
39
+ parent_claims = parent_content_claims[claim_key]
40
+ # Iterate over each claim in the current and parent content
41
+ current_claims.each_with_index do |current_claim, index|
42
+ parent_claim = parent_claims[index]
43
+ if parent_claim.nil?
44
+ # Claim was added
45
+ added << { key: claim_key, index: index }
46
+
47
+ elsif current_claim != parent_claim
48
+ # Claim was changed
49
+ changed << { key: claim_key, index: index }
50
+ end
51
+ end
52
+ # Check for removed claims
53
+ parent_claims.each_with_index do |parent_claim, index|
54
+ current_claim = current_claims[index]
55
+ if current_claim.nil?
56
+ # Claim was removed
57
+ removed << { key: claim_key, index: index }
58
+ end
59
+ end
60
+ else
61
+ # All claims in current content with this key were added
62
+ current_claims.each_index do |index|
63
+ added << { key: claim_key, index: index }
64
+ end
65
+ end
66
+ end
67
+
68
+ parent_content_claims.each do |claim_key, parent_claims|
69
+ # current content[claims] can be nil
70
+ parent_claims.each_index do |index|
71
+ if current_content_claims.nil? || !current_content_claims.key?(claim_key)
72
+ removed << { key: claim_key, index: index }
73
+ end
74
+ end
75
+ end
76
+ end
77
+
78
+ {
79
+ added: added,
80
+ removed: removed,
81
+ changed: changed
82
+ }
83
+ end
84
+ end
@@ -1,53 +1,75 @@
1
- class LabelAnalyzer
1
+ class LabelAnalyzer
2
+
2
3
  def self.isolate_labels_differences(current_content, parent_content)
3
4
  return {
4
- changed: [],
5
- removed: [],
6
- added: []
5
+ changed_labels: [],
6
+ removed_labels: [],
7
+ added_labels: []
7
8
  } if current_content.nil? && parent_content.nil?
8
-
9
- current_labels = current_content['labels'] || {}
10
- parent_labels = parent_content['labels'] || {}
11
-
12
- changed_labels = []
13
- removed_labels = []
14
- added_labels = []
15
-
16
- if current_labels.is_a?(Array) || parent_labels.is_a?(Array)
17
- return {
18
- changed: changed_labels,
19
- removed: removed_labels,
20
- added: added_labels
21
- }
9
+
10
+ if current_content
11
+ current_labels = current_content['labels']
12
+ if current_labels.nil? || current_labels.is_a?(Array)
13
+ current_labels = {}
14
+ end
15
+ else
16
+ current_labels = {}
22
17
  end
23
18
 
24
-
25
- # Iterate over each language in the current labels
26
- (current_labels || {}).each do |lang, current_label|
27
- parent_label = parent_labels[lang]
28
-
29
- if parent_label.nil?
30
- added_labels << { lang: lang }
31
- elsif current_label != parent_label
32
- changed_labels << { lang: lang }
33
- end
34
- end
35
-
36
- # Iterate over each language in the parent labels to find removed labels
37
- (parent_labels || {}).each do |lang, parent_label|
38
- if current_labels[lang].nil?
39
- removed_labels << { lang: lang }
40
- end
19
+ if parent_content
20
+ parent_labels = parent_content['labels']
21
+ if parent_labels.nil? || parent_labels.is_a?(Array)
22
+ parent_labels = {}
23
+ end
24
+ else
25
+ parent_labels = {}
41
26
  end
42
27
 
43
- # puts "Changed labels: #{changed_labels}"
44
- # puts "Removed labels: #{removed_labels}"
45
- # puts "Added labels: #{added_labels}"
28
+
29
+
30
+ changed_labels_labels = []
31
+ removed_labels_labels = []
32
+ added_labels_labels = []
33
+
34
+ # if parentid is 0, then add all labels as added_labels and return it
35
+ if parent_content.nil?
36
+ if !current_labels.empty?
37
+ current_labels.each do |lang, label|
38
+ added_labels_labels << { lang: lang }
39
+ end
40
+ end
41
+ return {
42
+ changed_labels: changed_labels_labels,
43
+ removed_labels: removed_labels_labels,
44
+ added_labels: added_labels_labels
45
+ }
46
+ else
47
+ # Iterate over each language in the current labels
48
+ (current_labels).each do |lang, current_label|
49
+ if parent_labels.empty?
50
+ added_labels_labels << { lang: lang }
51
+ else
52
+ parent_label = parent_labels[lang]
53
+ if parent_label.nil?
54
+ added_labels_labels << { lang: lang }
55
+ elsif current_label != parent_label
56
+ changed_labels_labels << { lang: lang }
57
+ end
58
+ end
59
+ end
60
+
61
+ # Iterate over each language in the parent labels to find removed_labels labels
62
+ (parent_labels).each do |lang, parent_label|
63
+ if current_labels.empty?
64
+ removed_labels_labels << { lang: lang }
65
+ end
66
+ end
67
+ end
46
68
 
47
69
  {
48
- changed: changed_labels,
49
- removed: removed_labels,
50
- added: added_labels
70
+ changed_labels: changed_labels_labels,
71
+ removed_labels: removed_labels_labels,
72
+ added_labels: added_labels_labels
51
73
  }
52
74
  end
53
75
  end
@@ -5,34 +5,49 @@ class LargeBatchesAnalyzer
5
5
  def self.handle_large_batches(revision_ids, batch_size)
6
6
  revision_contents = {}
7
7
  parent_contents = {}
8
-
9
-
10
- revision_ids_batches = revision_ids.each_slice(batch_size).to_a
11
- revision_ids_batches.each do |batch|
12
- parsed_contents = Api.get_revision_contents(batch)
13
- if parsed_contents
14
- parent_ids = []
15
- revision_contents.merge!(parsed_contents) if parsed_contents
16
- parsed_contents.values.each do |data|
17
- parent_id = data[:parentid]
18
-
19
- if parent_id != 0 && !parent_id.nil?
20
- parent_ids << parent_id
21
- end
22
- end
23
- parent_contents_batch = Api.get_revision_contents(parent_ids)
24
- parent_contents.merge!(parent_contents_batch) if parent_contents_batch
8
+ first_revisions = []
9
+
10
+ revision_ids.each_slice(batch_size) do |batch|
11
+ parent_ids = []
12
+ parsed_contents = Api.get_revision_contents(batch)
13
+ next unless parsed_contents
14
+
15
+ # I have to check if any of the revision ids in the parsed content has parentid == 0
16
+ parsed_contents.each do |revid, data|
17
+ if data[:parentid] == 0
18
+ first_revisions << revid
19
+ else
20
+ parent_ids << data[:parentid]
25
21
  end
22
+ end
23
+ revision_contents.merge!(parsed_contents)
24
+ parent_contents_batch = Api.get_revision_contents(parent_ids)
25
+ parent_contents.merge!(parent_contents_batch) if parent_contents_batch
26
26
  end
27
-
27
+
28
+ build_result(revision_contents, parent_contents, first_revisions)
29
+ end
30
+
31
+ def self.build_result(revision_contents, parent_contents, first_revisions)
28
32
  result = {}
29
33
  revision_contents.each do |revid, data|
30
- parentid = data[:parentid]
31
- parent_content = parent_contents[parentid] if parentid
32
- current = data ? data[:content] : nil
33
- parent = parent_content ? parent_content[:content] : nil
34
- result[revid] = { current_content: current, parent_content: parent }
34
+ parent_content = parent_contents[data[:parentid]]
35
+ result[revid] = {
36
+ current_content: data&.fetch(:content, nil),
37
+ parent_content: parent_content&.fetch(:content, nil),
38
+ comment: data&.fetch(:comment, nil),
39
+ model: data&.fetch(:model, nil)
40
+ }
41
+ end
42
+ first_revisions.each do |revid|
43
+ result[revid] = {
44
+ current_content: revision_contents[revid]&.fetch(:content, nil),
45
+ parent_content: nil,
46
+ comment: revision_contents[revid]&.fetch(:comment, nil),
47
+ model: revision_contents[revid]&.fetch(:model, nil)
48
+ }
35
49
  end
36
50
  result
37
- end
51
+ end
52
+
38
53
  end
@@ -0,0 +1,70 @@
1
+ class LemmaAnalyzer
2
+ def self.isolate_lemmas_differences(current_content, parent_content)
3
+ return {
4
+ changed_lemmas: [],
5
+ removed_lemmas: [],
6
+ added_lemmas: []
7
+ } if current_content.nil? && parent_content.nil?
8
+
9
+
10
+ if current_content
11
+ current_labels = current_content['lemmas']
12
+ if current_labels.nil? || current_labels.is_a?(Array)
13
+ current_labels = {}
14
+ end
15
+ else
16
+ current_labels = {}
17
+ end
18
+ if parent_content
19
+ parent_labels = parent_content['lemmas']
20
+ if parent_labels.nil? || parent_labels.is_a?(Array)
21
+ parent_labels = {}
22
+ end
23
+ else
24
+ parent_labels = {}
25
+ end
26
+
27
+ changed_labels = []
28
+ removed_labels = []
29
+ added_labels = []
30
+
31
+
32
+ # if parentid is 0, then add all labels as added and return it
33
+ if parent_content.nil?
34
+ current_labels.each do |lang, label|
35
+ added_labels << { lang: lang }
36
+ end
37
+ return {
38
+ changed_lemmas: changed_labels,
39
+ removed_lemmas: removed_labels,
40
+ added_lemmas: added_labels
41
+ }
42
+ else
43
+
44
+
45
+ # Iterate over each language in the current labels
46
+ (current_labels || {}).each do |lang, current_label|
47
+ parent_label = parent_labels[lang]
48
+
49
+ if parent_label.nil?
50
+ added_labels << { lang: lang }
51
+ elsif current_label != parent_label
52
+ changed_labels << { lang: lang }
53
+ end
54
+ end
55
+
56
+ # Iterate over each language in the parent labels to find removed labels
57
+ (parent_labels || {}).each do |lang, parent_label|
58
+ if current_labels[lang].nil?
59
+ removed_labels << { lang: lang }
60
+ end
61
+ end
62
+ end
63
+
64
+ {
65
+ changed_lemmas: changed_labels,
66
+ removed_lemmas: removed_labels,
67
+ added_lemmas: added_labels
68
+ }
69
+ end
70
+ end
@@ -0,0 +1,83 @@
1
+ class QualifierAnalyzer
2
+ # helper method for adding qualifiers
3
+ # handles added and removed qualifiers
4
+ def self.qualifier_updates(claim, updated_qualifiers, claim_key, claim_index)
5
+ if claim["qualifiers"]
6
+ qualifiers = claim["qualifiers"]
7
+ qualifiers.each do |qualifier_key, qualifier_values|
8
+ qualifier_values.each_with_index do |qualifier_value, qualifier_index|
9
+ updated_qualifiers << {
10
+ claim_key: claim_key,
11
+ claim_index: claim_index,
12
+ qualifier_key: qualifier_key,
13
+ qualifier_index: qualifier_index
14
+ }
15
+ end
16
+ end
17
+ end
18
+ updated_qualifiers
19
+ end
20
+
21
+ # helper method for changed qualifiers
22
+ def self.handle_changed_qualifiers(current_claim, parent_claim, changed_qualifiers, added_qualifiers, removed_qualifiers, claim_key, claim_index)
23
+ current_qualifiers = current_claim["qualifiers"] ? current_claim["qualifiers"] : {}
24
+ parent_qualifiers = parent_claim["qualifiers"] ? parent_claim["qualifiers"] : {}
25
+
26
+ current_qualifiers.each do |qualifier_key, qualifier_values|
27
+ qualifier_values.each_with_index do |qualifier_value, qualifier_index|
28
+ if parent_qualifiers.key?(qualifier_key)
29
+ parent = parent_qualifiers[qualifier_key]
30
+ end
31
+ # Check if the qualifier index exists in the parent content
32
+ if !parent.nil?
33
+ parent = parent[qualifier_index]
34
+ # check if the parent claim was changed by comparing the objects first
35
+ if parent != qualifier_value
36
+ # Claim was changed
37
+ changed_qualifiers << {
38
+ claim_key: claim_key,
39
+ claim_index: claim_index,
40
+ qualifier_key: qualifier_key,
41
+ qualifier_index: qualifier_index
42
+ }
43
+ end
44
+ else
45
+ # Claim was added
46
+ added_qualifiers << {
47
+ claim_key: claim_key,
48
+ claim_index: claim_index,
49
+ qualifier_key: qualifier_key,
50
+ qualifier_index: qualifier_index
51
+ }
52
+ end
53
+ end
54
+ end
55
+ # Check for removed claims
56
+ parent_qualifiers.each do |qualifier_key, qualifier_values|
57
+ qualifier_values.each_with_index do |qualifier_value, qualifier_index|
58
+ if current_qualifiers.key?(qualifier_key)
59
+ current = current_qualifiers[qualifier_key]
60
+ end
61
+ # Check if the qualifier index exists in the current content
62
+ if !current.nil?
63
+ current = current[qualifier_index]
64
+ end
65
+ if current.nil?
66
+ # Claim was removed
67
+ removed_qualifiers << {
68
+ claim_key: claim_key,
69
+ claim_index: claim_index,
70
+ qualifier_key: qualifier_key,
71
+ qualifier_index: qualifier_index
72
+ }
73
+ end
74
+ end
75
+ end
76
+
77
+ {
78
+ added_qualifiers: added_qualifiers,
79
+ removed_qualifiers: removed_qualifiers,
80
+ changed_qualifiers: changed_qualifiers
81
+ }
82
+ end
83
+ end
@@ -0,0 +1,49 @@
1
+ class ReferenceAnalyzer
2
+ # helper method for adding and removing references
3
+ def self.reference_updates(claim, updated_references, claim_key, claim_index)
4
+ if claim["references"]
5
+ claim["references"].each_with_index do |current_ref, ref_index|
6
+ updated_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
7
+ end
8
+ end
9
+ updated_references
10
+ end
11
+
12
+ # helper method for changed references
13
+ def self.handle_changed_references(current_claim, parent_claim, changed_references, added_references, removed_references, claim_key, claim_index)
14
+ current_references = current_claim["references"] ? current_claim["references"] : []
15
+ parent_references = parent_claim["references"] ? parent_claim["references"] : []
16
+
17
+ current_references.each_with_index do |current_ref, ref_index|
18
+ if parent_references.empty?
19
+ added_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
20
+ elsif !parent_references.include?(current_ref)
21
+ added_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
22
+ elsif ref_modified?(current_ref, parent_references)
23
+ changed_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
24
+ end
25
+ end
26
+
27
+ parent_references.each_with_index do |parent_ref, ref_index|
28
+ if !current_references.include?(parent_ref)
29
+ removed_references << { claim_key: claim_key, claim_index: claim_index, reference_index: ref_index }
30
+ end
31
+ end
32
+
33
+ {
34
+ added_references: added_references,
35
+ removed_references: removed_references,
36
+ changed_references: changed_references
37
+ }
38
+ end
39
+
40
+ # helper method for checking if a reference has been modified
41
+ def self.ref_modified?(current_reference, parent_references)
42
+ parent_references.each do |parent_reference|
43
+ if current_reference["snaks"] != parent_reference["snaks"]
44
+ return true
45
+ end
46
+ end
47
+ false
48
+ end
49
+ end
@@ -0,0 +1,71 @@
1
+ class RepresentationAnalyzer
2
+ def self.isolate_representation_differences(current_content, parent_content)
3
+ return {
4
+ changed: [],
5
+ removed: [],
6
+ added: []
7
+ } if current_content.nil? && parent_content.nil?
8
+
9
+ if current_content
10
+ current_representations = current_content['representations']
11
+ if current_representations.nil? || current_representations.is_a?(Array)
12
+ current_representations = {}
13
+ end
14
+ else
15
+ current_representations = {}
16
+ end
17
+
18
+ if parent_content
19
+ parent_representations = parent_content['representations']
20
+ if parent_representations.nil? || parent_representations.is_a?(Array)
21
+ parent_representations = {}
22
+ end
23
+ else
24
+ parent_representations = {}
25
+ end
26
+
27
+
28
+ changed = []
29
+ removed = []
30
+ added = []
31
+
32
+
33
+ # if parentid is 0, then add all labels as added and return it
34
+ if parent_content.nil?
35
+ current_representations.each do |lang, label|
36
+ added << { lang: lang }
37
+ end
38
+ return {
39
+ changed: changed,
40
+ removed: removed,
41
+ added: added
42
+ }
43
+ else
44
+
45
+
46
+ # Iterate over each language in the current labels
47
+ (current_representations || {}).each do |lang, current_representation|
48
+ parent_representation = parent_representations[lang]
49
+
50
+ if parent_representation.nil?
51
+ added << { lang: lang }
52
+ elsif current_representation != parent_representation
53
+ changed << { lang: lang }
54
+ end
55
+ end
56
+
57
+ # Iterate over each language in the parent labels to find removed labels
58
+ (parent_representations || {}).each do |lang, parent_representation|
59
+ if current_representations[lang].nil?
60
+ removed << { lang: lang }
61
+ end
62
+ end
63
+ end
64
+
65
+ {
66
+ changed: changed,
67
+ removed: removed,
68
+ added: added
69
+ }
70
+ end
71
+ end