family-reunion 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.2
1
+ 0.1.3
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{family-reunion}
8
- s.version = "0.1.2"
8
+ s.version = "0.1.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Dmitry Mozzherin", "David Shorthouse"]
12
- s.date = %q{2011-06-07}
12
+ s.date = %q{2011-06-14}
13
13
  s.description = %q{An algorithm to merge related nodes of two taxonomic hierarchies with synonym information}
14
14
  s.email = %q{dmozzherin@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -1,3 +1,4 @@
1
+ require 'logger'
1
2
  require 'json'
2
3
  require 'taxamatch_rb'
3
4
  require 'family-reunion/cache'
@@ -15,6 +16,24 @@ class FamilyReunion
15
16
  attr :primary_valid_names_set, :secondary_valid_names_set
16
17
  attr :primary_synonyms_set, :secondary_synonyms_set
17
18
 
19
+ VERSION = open(File.join(File.dirname(__FILE__), '..', 'VERSION')).readline.strip
20
+
21
+ def self.logger
22
+ @@logger ||= Logger.new(nil)
23
+ end
24
+
25
+ def self.logger=(logger)
26
+ @@logger = logger
27
+ end
28
+
29
+ def self.logger_reset
30
+ self.logger = Logger.new(nil)
31
+ end
32
+
33
+ def self.logger_write(obj_id, message, method = :info)
34
+ self.logger.send(method, "|%s|%s|" % [obj_id, message])
35
+ end
36
+
18
37
  def initialize(primary_node, secondary_node)
19
38
  @primary_node = FamilyReunion::TopNode.new(primary_node)
20
39
  @secondary_node = FamilyReunion::TopNode.new(secondary_node)
@@ -29,20 +48,24 @@ class FamilyReunion
29
48
  merge_exact_matches
30
49
  merge_fuzzy_matches if with_fuzzy_matching
31
50
  merge_no_matches
51
+ FamilyReunion.logger_write(self.object_id, "Merging is complete")
32
52
  @merges
33
53
  end
34
54
 
35
55
  private
36
56
 
37
57
  def merge_exact_matches
58
+ FamilyReunion.logger_write(self.object_id, "Started merging of exact matches")
38
59
  ExactMatcher.new(self).merge
39
60
  end
40
61
 
41
62
  def merge_fuzzy_matches
63
+ FamilyReunion.logger_write(self.object_id, "Started merging of fuzzy matches")
42
64
  FuzzyMatcher.new(self).merge
43
65
  end
44
-
66
+
45
67
  def merge_no_matches
68
+ FamilyReunion.logger_write(self.object_id, "Started gap filling, adding new species and uninomials")
46
69
  NomatchOrganizer.new(self).merge
47
70
  end
48
71
 
@@ -7,9 +7,13 @@ class FamilyReunion
7
7
  end
8
8
 
9
9
  def merge
10
+ FamilyReunion.logger_write(@fr.object_id, "Merging exact matches of accepted names")
10
11
  add_valid_matches(get_valid_matches)
12
+ FamilyReunion.logger_write(@fr.object_id, "Merging exact matches of accepted names to synonyms")
11
13
  add_synonym_matches(get_valid_to_synonym_matches, :valid_to_synonym)
14
+ FamilyReunion.logger_write(@fr.object_id, "Merging exact matches of synonyms to accepted names")
12
15
  add_synonym_matches(get_synonym_to_valid_matches, :synonym_to_valid)
16
+ FamilyReunion.logger_write(@fr.object_id, "Merging exact matches of synonyms")
13
17
  add_synonym_matches(get_synonym_to_synonym_matches, :synonym_to_synonym)
14
18
  end
15
19
 
@@ -23,9 +27,9 @@ class FamilyReunion
23
27
  # Homonyms are treated separately, and are not matched by the algorithm,
24
28
  # they are excluded from valid_matches
25
29
  valid_matches.each do |name|
26
- primary_id = @fr.primary_node.valid_names_hash[name][:id]
27
- secondary_id = @fr.secondary_node.valid_names_hash[name][:id]
28
- @fr.merges[primary_id] = {:matches => {secondary_id.to_s => {:match_type => :valid_to_valid}}, :nonmatches => []}
30
+ primary_id = @fr.primary_node.valid_names_hash[name][:id].to_s.to_sym
31
+ secondary_id = @fr.secondary_node.valid_names_hash[name][:id].to_s.to_sym
32
+ @fr.merges[primary_id] = {:matches => {secondary_id => {:match_type => :valid_to_valid}}, :nonmatches => []}
29
33
  end
30
34
  end
31
35
 
@@ -36,7 +40,7 @@ class FamilyReunion
36
40
  def get_synonym_to_valid_matches
37
41
  @fr.primary_synonyms_set & @fr.secondary_valid_names_set
38
42
  end
39
-
43
+
40
44
  def get_synonym_to_synonym_matches
41
45
  @fr.primary_synonyms_set & @fr.secondary_synonyms_set
42
46
  end
@@ -61,9 +65,9 @@ class FamilyReunion
61
65
  valid_names = node.valid_names_hash
62
66
  synonyms = node.synonyms_hash
63
67
  if valid_names.has_key?(name)
64
- return [valid_names[name][:id]]
68
+ return [valid_names[name][:id].to_s.to_sym]
65
69
  else
66
- return synonyms[name].map {|n| n[:id]}
70
+ return synonyms[name].map {|n| n[:id].to_s.to_sym}
67
71
  end
68
72
  end
69
73
 
@@ -8,9 +8,13 @@ class FamilyReunion
8
8
  end
9
9
 
10
10
  def merge
11
+ FamilyReunion.logger_write(@fr.object_id, "Merging fuzzy matches of accepted names")
11
12
  add_matches(get_valid_matches, :fuzzy_valid_to_valid)
13
+ FamilyReunion.logger_write(@fr.object_id, "Merging fuzzy matches of accepted names to synonyms")
12
14
  add_matches(get_valid_to_synonym_matches, :fuzzy_valid_to_synonym)
15
+ FamilyReunion.logger_write(@fr.object_id, "Merging fuzzy matches of synonyms to accepted names")
13
16
  add_matches(get_synonym_to_valid_matches, :fuzzy_synonym_to_valid)
17
+ FamilyReunion.logger_write(@fr.object_id, "Merging fuzzy matches of synonyms")
14
18
  add_matches(get_synonym_to_synonym_matches, :fuzzy_synonym_to_synonym)
15
19
  end
16
20
 
@@ -42,8 +46,8 @@ class FamilyReunion
42
46
 
43
47
  def add_matches(matched_nodes, match_type)
44
48
  matched_nodes.each do |primary_node, secondary_nodes|
45
- primary_id = primary_node[:id]
46
- secondary_ids = secondary_nodes.map { |n| n[:id] }
49
+ primary_id = primary_node[:id].to_s.to_sym
50
+ secondary_ids = secondary_nodes.map { |n| n[:id].to_s.to_sym }
47
51
  secondary_id_matches = format_secondary_id_for_merge(secondary_ids, match_type)
48
52
  add_record_to_merges(primary_id, secondary_id_matches)
49
53
  end
@@ -3,13 +3,14 @@ class FamilyReunion
3
3
  private
4
4
  def format_secondary_id_for_merge(secondary_ids, match_type)
5
5
  secondary_ids.inject({}) do |res, i|
6
- i = i.to_s
6
+ raise "Secondary id is not a symbol" unless i.is_a?(Symbol)
7
7
  res[i] = {:match_type => match_type} unless res.has_key?(i)
8
8
  res
9
9
  end
10
10
  end
11
11
 
12
12
  def add_record_to_merges(primary_id, secondary_id_matches)
13
+ raise "Primary id is not a symbol" unless primary_id.is_a?(Symbol)
13
14
  if @fr.merges.has_key?(primary_id)
14
15
  secondary_id_matches.each do |key, val|
15
16
  @fr.merges[primary_id][:matches][key] = val unless @fr.merges[primary_id][:matches].has_key?(key)
@@ -7,6 +7,7 @@ class FamilyReunion
7
7
  end
8
8
 
9
9
  def merge
10
+ FamilyReunion.logger_write(@fr.object_id, "Filling gaps with new taxa")
10
11
  organize_nonmatches(get_nomach_secondary_ids)
11
12
  end
12
13
 
@@ -76,7 +76,7 @@ class FamilyReunion
76
76
 
77
77
  def update_paths_hash(node)
78
78
  path = node[:path].map { |n| n.to_sym }
79
- path_ids = node[:path_ids].map { |i| i.to_sym }
79
+ path_ids = node[:path_ids].map { |i| i.to_s.to_sym }
80
80
  until path.empty?
81
81
  populate_paths_hash(path, path_ids)
82
82
  path.pop
@@ -6,6 +6,7 @@ describe FamilyReunion do
6
6
  end
7
7
 
8
8
  it "should generate instances of nodes" do
9
+ FamilyReunion.logger = Logger.new($stdout)
9
10
  @fr.primary_node.is_a?(FamilyReunion::TopNode).should be_true
10
11
  @fr.secondary_node.is_a?(FamilyReunion::TopNode).should be_true
11
12
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 2
9
- version: 0.1.2
8
+ - 3
9
+ version: 0.1.3
10
10
  platform: ruby
11
11
  authors:
12
12
  - Dmitry Mozzherin
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-06-07 00:00:00 -04:00
18
+ date: 2011-06-14 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -233,7 +233,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
233
233
  requirements:
234
234
  - - ">="
235
235
  - !ruby/object:Gem::Version
236
- hash: 1186928035501677277
236
+ hash: 4425929328463574267
237
237
  segments:
238
238
  - 0
239
239
  version: "0"