synonym-finder 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/.rvmrc +1 -0
- data/Gemfile +19 -0
- data/Gemfile.lock +72 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +64 -0
- data/Rakefile +52 -0
- data/VERSION +1 -0
- data/features/step_definitions/synonym-finder_steps.rb +0 -0
- data/features/support/env.rb +13 -0
- data/features/synonym-finder.feature +9 -0
- data/lib/synonym-finder.rb +148 -0
- data/lib/synonym-finder/duplicate_finder.rb +87 -0
- data/lib/synonym-finder/group_organizer.rb +121 -0
- data/spec/spec_helper.rb +19 -0
- data/spec/support/input.rb +32 -0
- data/spec/synonym-finder_spec.rb +32 -0
- data/synonym-finder.gemspec +85 -0
- metadata +181 -0
data/.document
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/.rvmrc
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rvm use ruby-1.9.2-p290@synfnd --create
|
data/Gemfile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
gem 'sqlite3'
|
6
|
+
gem 'taxamatch_rb'
|
7
|
+
gem 'biodiversity19'
|
8
|
+
gem 'ruby-stemmer'
|
9
|
+
|
10
|
+
# Add dependencies to develop your gem here.
|
11
|
+
# Include everything needed to run rake, tests, features, etc.
|
12
|
+
group :development do
|
13
|
+
gem "ruby-debug19"
|
14
|
+
gem "rspec", "~> 2.3.0"
|
15
|
+
gem "cucumber", ">= 0"
|
16
|
+
gem "bundler", "~> 1.0.0"
|
17
|
+
gem "jeweler", "~> 1.6.0"
|
18
|
+
gem "rcov", ">= 0"
|
19
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
archive-tar-minitar (0.5.2)
|
5
|
+
biodiversity (0.7.3)
|
6
|
+
json
|
7
|
+
treetop
|
8
|
+
biodiversity19 (0.7.3)
|
9
|
+
treetop
|
10
|
+
builder (3.0.0)
|
11
|
+
columnize (0.3.3)
|
12
|
+
cucumber (1.0.2)
|
13
|
+
builder (>= 2.1.2)
|
14
|
+
diff-lcs (>= 1.1.2)
|
15
|
+
gherkin (~> 2.4.5)
|
16
|
+
json (>= 1.4.6)
|
17
|
+
term-ansicolor (>= 1.0.5)
|
18
|
+
diff-lcs (1.1.2)
|
19
|
+
gherkin (2.4.5)
|
20
|
+
json (>= 1.4.6)
|
21
|
+
git (1.2.5)
|
22
|
+
jeweler (1.6.4)
|
23
|
+
bundler (~> 1.0)
|
24
|
+
git (>= 1.2.5)
|
25
|
+
rake
|
26
|
+
json (1.5.3)
|
27
|
+
linecache19 (0.5.12)
|
28
|
+
ruby_core_source (>= 0.1.4)
|
29
|
+
polyglot (0.3.1)
|
30
|
+
rake (0.9.2)
|
31
|
+
rcov (0.9.9)
|
32
|
+
rspec (2.3.0)
|
33
|
+
rspec-core (~> 2.3.0)
|
34
|
+
rspec-expectations (~> 2.3.0)
|
35
|
+
rspec-mocks (~> 2.3.0)
|
36
|
+
rspec-core (2.3.1)
|
37
|
+
rspec-expectations (2.3.0)
|
38
|
+
diff-lcs (~> 1.1.2)
|
39
|
+
rspec-mocks (2.3.0)
|
40
|
+
ruby-debug-base19 (0.11.25)
|
41
|
+
columnize (>= 0.3.1)
|
42
|
+
linecache19 (>= 0.5.11)
|
43
|
+
ruby_core_source (>= 0.1.4)
|
44
|
+
ruby-debug19 (0.11.6)
|
45
|
+
columnize (>= 0.3.1)
|
46
|
+
linecache19 (>= 0.5.11)
|
47
|
+
ruby-debug-base19 (>= 0.11.19)
|
48
|
+
ruby-stemmer (0.9.1)
|
49
|
+
ruby_core_source (0.1.5)
|
50
|
+
archive-tar-minitar (>= 0.5.2)
|
51
|
+
sqlite3 (1.3.3)
|
52
|
+
taxamatch_rb (0.7.6)
|
53
|
+
biodiversity (~> 0.7.3)
|
54
|
+
biodiversity19 (~> 0.7.3)
|
55
|
+
term-ansicolor (1.0.5)
|
56
|
+
treetop (1.4.9)
|
57
|
+
polyglot (>= 0.3.1)
|
58
|
+
|
59
|
+
PLATFORMS
|
60
|
+
ruby
|
61
|
+
|
62
|
+
DEPENDENCIES
|
63
|
+
biodiversity19
|
64
|
+
bundler (~> 1.0.0)
|
65
|
+
cucumber
|
66
|
+
jeweler (~> 1.6.0)
|
67
|
+
rcov
|
68
|
+
rspec (~> 2.3.0)
|
69
|
+
ruby-debug19
|
70
|
+
ruby-stemmer
|
71
|
+
sqlite3
|
72
|
+
taxamatch_rb
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2011 Dmitry Mozzherin
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
= synonym-finder
|
2
|
+
|
3
|
+
Synonym finder is a biodiversity tool for finding homotypic nomenclatural synonyms in taxonomic hierarchies.`
|
4
|
+
|
5
|
+
== Installation
|
6
|
+
|
7
|
+
gem install synonym-finder
|
8
|
+
|
9
|
+
== Usage
|
10
|
+
|
11
|
+
#prepare intput for the gem. It understand following array of hashes as input:
|
12
|
+
|
13
|
+
input = [
|
14
|
+
{id: 001, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Gnamptogenys", name: "Gnamptogenys porcata (Emery, 1896)"},
|
15
|
+
{id: 003, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Gnamptogenys", name: "Gnamptogenys triangularis (Mayr, 1887)"},
|
16
|
+
{id: 004, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Gnamptogenys", name: "Gnamptogenys triangularis var. alba Brown 1992"},
|
17
|
+
{id: 005, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Gnamptogenys", name: "Gnamptogenys triangularis var. borealis Brown 1992"},
|
18
|
+
{id: 100, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Nylanderia", name: "Nylanderia porcata"}, #match 001, no authorhsip
|
19
|
+
{id: 101, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Nylanderia", name: "Nylanderia porcatum Emery, 1896"},
|
20
|
+
{id: 102, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Nylanderia", name: "Nylanderia porcatum"}...]
|
21
|
+
|
22
|
+
# please note that id can be a number of a string
|
23
|
+
|
24
|
+
requre 'synonym-finder'
|
25
|
+
|
26
|
+
sf = SynonymFinder.new(input)
|
27
|
+
output = sf.find_synonyms
|
28
|
+
|
29
|
+
# output should be of a following format:
|
30
|
+
# matched putative synonyms are collected into groups of different types
|
31
|
+
#
|
32
|
+
# [ {:type=>"chresonym", :name_ids=>[203, 204]},
|
33
|
+
# {:type=>"alt_placement", :name_ids=>[400, 600]},
|
34
|
+
# {:type=>"chresonym", :name_ids=>[101, 102]},
|
35
|
+
# {:type=>"homotypic", :name_ids=>[203, 303]},
|
36
|
+
# {:type=>"lexical_variant", :name_ids=>[800, 803]},
|
37
|
+
# {:type=>"lexical_variant", :name_ids=>[801, 802]},
|
38
|
+
# {:type=>"homotypic", :name_ids=>[202, 302]},
|
39
|
+
# {:type=>"homotypic", :name_ids=>[1, 101]},
|
40
|
+
# {:type=>"misplaced_synonym", :name_ids=>[801, 803, 802, 800]}]
|
41
|
+
|
42
|
+
== Synonym types
|
43
|
+
|
44
|
+
* homotypic -- possible placement of species to a different genus
|
45
|
+
* alt_placement -- possibly the same name (i.e. genus moved to a different family
|
46
|
+
* chresonym -- different authorship with for the same canonical form, both having the same parent
|
47
|
+
* lexical_variant -- The same parent and genus, but species epithet suffix is different (for example change of the epithet gender)
|
48
|
+
* misplaced_synonym -- The same parent, matching species epithet, but genus varies, usually happens if a synonyn located at the same level as species.
|
49
|
+
|
50
|
+
== Contributing to synonym-finder
|
51
|
+
|
52
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
53
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
54
|
+
* Fork the project
|
55
|
+
* Start a feature/bugfix branch
|
56
|
+
* Commit and push until you are happy with your contribution
|
57
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
58
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
59
|
+
|
60
|
+
== Copyright
|
61
|
+
|
62
|
+
Copyright (c) 2011 Dmitry Mozzherin. See LICENSE.txt for
|
63
|
+
further details.
|
64
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "synonym-finder"
|
18
|
+
gem.homepage = "http://github.com/dimus/synonym-finder"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{Synonym finder is a biodiversity tool for finding homotypic nomenclatural synonyms in taxonomic hierarchies.}
|
21
|
+
gem.description = %Q{Synonym finder is a biodiversity tool for finding homotypic nomenclatural synonyms in taxonomic hierarchies.}
|
22
|
+
gem.email = "dmozzherin@gmail.com"
|
23
|
+
gem.authors = ["Dmitry Mozzherin"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rspec/core'
|
29
|
+
require 'rspec/core/rake_task'
|
30
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
31
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
32
|
+
end
|
33
|
+
|
34
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
35
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
36
|
+
spec.rcov = true
|
37
|
+
end
|
38
|
+
|
39
|
+
require 'cucumber/rake/task'
|
40
|
+
Cucumber::Rake::Task.new(:features)
|
41
|
+
|
42
|
+
task :default => :spec
|
43
|
+
|
44
|
+
require 'rake/rdoctask'
|
45
|
+
Rake::RDocTask.new do |rdoc|
|
46
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
47
|
+
|
48
|
+
rdoc.rdoc_dir = 'rdoc'
|
49
|
+
rdoc.title = "synonym-finder #{version}"
|
50
|
+
rdoc.rdoc_files.include('README*')
|
51
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
52
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.2.2
|
File without changes
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
begin
|
3
|
+
Bundler.setup(:default, :development)
|
4
|
+
rescue Bundler::BundlerError => e
|
5
|
+
$stderr.puts e.message
|
6
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
7
|
+
exit e.status_code
|
8
|
+
end
|
9
|
+
|
10
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__) + '/../../lib')
|
11
|
+
require 'synonym-finder'
|
12
|
+
|
13
|
+
require 'rspec/expectations'
|
@@ -0,0 +1,148 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'json'
|
3
|
+
require 'sqlite3'
|
4
|
+
require 'taxamatch_rb'
|
5
|
+
require 'lingua/stemmer'
|
6
|
+
|
7
|
+
Dir["#{File.dirname(__FILE__)}/synonym-finder/**/*.rb"].each {|f| require f}
|
8
|
+
|
9
|
+
class SynonymFinder
|
10
|
+
NO_AUTH_INFO = 10
|
11
|
+
PARTIAL_AUTH_INFO = 20
|
12
|
+
AUTH_MATCH = 100
|
13
|
+
AUTH_NO_MATCH = 0
|
14
|
+
|
15
|
+
attr :input, :db, :matches, :part_matches
|
16
|
+
|
17
|
+
def self.logger
|
18
|
+
@@logger ||= Logger.new(nil)
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.logger=(logger)
|
22
|
+
@@logger = logger
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.logger_reset
|
26
|
+
self.logger = Logger.new(nil)
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.logger_write(obj_id, message, method = :info)
|
30
|
+
self.logger.send(method, "|%s|%s|" % [obj_id, message])
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
def initialize(input, in_memory = true)
|
35
|
+
@input = input
|
36
|
+
@atomizer = Taxamatch::Atomizer.new
|
37
|
+
@tm = Taxamatch::Base.new
|
38
|
+
@stemmer = Lingua::Stemmer.new(:language => "latin")
|
39
|
+
@db = init_db(in_memory)
|
40
|
+
#tmp_populate
|
41
|
+
build_tree unless @db.execute("select count(*) from names")[0][0].to_i > 0
|
42
|
+
@matches = {}
|
43
|
+
@part_matches = {}
|
44
|
+
@duplicate_finder = DuplicateFinder.new(self)
|
45
|
+
@group_organizer = GroupOrganizer.new(self)
|
46
|
+
end
|
47
|
+
|
48
|
+
def find_matches(threshold = 5)
|
49
|
+
@duplicate_finder.canonical_duplicates
|
50
|
+
matches = @duplicate_finder.species_epithet_duplicates(threshold)
|
51
|
+
matches = compare_authorship(matches)
|
52
|
+
clean_up(matches)
|
53
|
+
@group_organizer.organize
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def clean_up(matches)
|
59
|
+
matches.each do |key, value|
|
60
|
+
next if value[:type] != :chresonym && value[:auth_match] < 20
|
61
|
+
value[:auth_match] == 100 || value[:type] == :chresonym ? @matches[key] = value : @part_matches[key] = value
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def compare_authorship(matches)
|
66
|
+
SynonymFinder.logger_write(self.object_id, "Matching authorship")
|
67
|
+
count = 0
|
68
|
+
matches.each do |key, value|
|
69
|
+
count += 1
|
70
|
+
SynonymFinder.logger_write(self.object_id, "Matching authors %s" % count) if count % 1000 == 0
|
71
|
+
ids = key.join(",")
|
72
|
+
res = @db.execute("select authors, years from names where id in (#{ids})")
|
73
|
+
data1 = {:all_authors => Marshal.load(res[0][0]), :all_years =>Marshal.load(res[0][1])}
|
74
|
+
data2 = {:all_authors => Marshal.load(res[1][0]), :all_years =>Marshal.load(res[1][1])}
|
75
|
+
if (data1[:all_authors] + data1[:all_years] + data2[:all_authors] + data2 [:all_years]) == []
|
76
|
+
value[:auth_match] = NO_AUTH_INFO
|
77
|
+
elsif (data1[:all_authors] + data1[:all_years]).empty? || (data2[:all_authors] + data2[:all_years]).empty?
|
78
|
+
value[:auth_match] = PARTIAL_AUTH_INFO
|
79
|
+
else
|
80
|
+
value[:auth_match] = @tm.match_authors(data1, data2) == 0 ? AUTH_NO_MATCH : AUTH_MATCH
|
81
|
+
end
|
82
|
+
end
|
83
|
+
matches
|
84
|
+
end
|
85
|
+
|
86
|
+
def build_tree
|
87
|
+
SynonymFinder.logger_write(self.object_id, "Ingesting data")
|
88
|
+
@input.each_with_index do |row, i|
|
89
|
+
i += 1
|
90
|
+
SynonymFinder.logger_write(self.object_id, "Ingesting record %s" % i) if i % 1000 == 0
|
91
|
+
atomized_name = @atomizer.parse row[:name] rescue nil
|
92
|
+
next unless atomized_name && atomized_name[:species]
|
93
|
+
species_string = get_species(atomized_name)
|
94
|
+
canonical_name = atomized_name[:genus][:string] + " " + species_string
|
95
|
+
@db.execute("insert into names (id, name, authors, years) values (?, ?, ?, ?)", [row[:id], row[:name], Marshal.dump(atomized_name[:all_authors]), Marshal.dump(atomized_name[:all_years])])
|
96
|
+
@db.execute("insert into name_parts (name_id, path, canonical, epithet, epithet_stem) values (?, ?, ?, ?, ?)", [row[:id], row[:path], canonical_name, species_string, stem_epithet(species_string)])
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def init_db(in_memory)
|
101
|
+
if in_memory == true
|
102
|
+
db = SQLite3::Database.new( ":memory:" )
|
103
|
+
create_tables(db)
|
104
|
+
else
|
105
|
+
db_file = "/tmp/syn_finder.sql"
|
106
|
+
db_exist = File.exist?(db_file)
|
107
|
+
db = SQLite3::Database.new("/tmp/syn_finder.sql")
|
108
|
+
unless db_exist
|
109
|
+
create_tables(db)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
db
|
113
|
+
end
|
114
|
+
|
115
|
+
def create_tables(db)
|
116
|
+
db.execute("create table names (id string primary key, name string, authors, years)")
|
117
|
+
# db.execute("create table paths (id integer primary key autoincrement, path)")
|
118
|
+
# db.execute("create table paths_names (path_id integer, name_id string, level integer, primary key (path_id, name_id))")
|
119
|
+
db.execute("create table name_parts (name_id string, path string, canonical string, epithet string, epithet_stem string)")
|
120
|
+
db.execute("create index idx_name_parts_1 on name_parts (canonical)")
|
121
|
+
db.execute("create index idx_name_parts_2 on name_parts (epithet_stem)")
|
122
|
+
db.execute("create table groups (id integer primary key, type)")
|
123
|
+
db.execute("create table names_groups (name_id integer, group_id integer, score_max integer, score_sum integer, score_num integer, primary key (name_id, group_id))")
|
124
|
+
db.execute("create index idx_names_groups_2 on names_groups (group_id)")
|
125
|
+
end
|
126
|
+
|
127
|
+
def get_species(atomized_name)
|
128
|
+
species = [atomized_name[:species][:string]]
|
129
|
+
species += atomized_name[:infraspecies].map {|i| i[:string]} if atomized_name[:infraspecies]
|
130
|
+
species.join(" ")
|
131
|
+
end
|
132
|
+
|
133
|
+
def stem_epithet(epithet)
|
134
|
+
epithet.split(" ").map { |e| @stemmer.stem(e) }.join(" ")
|
135
|
+
end
|
136
|
+
|
137
|
+
def tmp_populate
|
138
|
+
f = open("/tmp/dump.sql")
|
139
|
+
f.each_with_index do |line, i|
|
140
|
+
i += 1
|
141
|
+
puts "loading from dump line %s" % i if i % 10000 == 0
|
142
|
+
if line.match /INSERT/
|
143
|
+
@db.execute(line.strip)
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
class SynonymFinder
|
2
|
+
class DuplicateFinder
|
3
|
+
|
4
|
+
def initialize(synonym_finder)
|
5
|
+
@synonym_finder = synonym_finder
|
6
|
+
@db = @synonym_finder.db
|
7
|
+
@matches = {}
|
8
|
+
end
|
9
|
+
|
10
|
+
def canonical_duplicates
|
11
|
+
SynonymFinder.logger_write(@synonym_finder.object_id, "Processing canonical forms")
|
12
|
+
@db.execute("select canonical from name_parts group by canonical having count(*) > 1").each_with_index do |canonical, i|
|
13
|
+
i = i + 1
|
14
|
+
SynonymFinder.logger_write(@synonym_finder.object_id, "Processing canonical form candidate %s" % i) if i % 100 == 0
|
15
|
+
names = @db.execute("select name_id, path from name_parts where canonical = ?", canonical)
|
16
|
+
find_pairs(names)
|
17
|
+
end
|
18
|
+
@matches.each do |key, value|
|
19
|
+
if value[:total_distance] == 0
|
20
|
+
value[:type] = :chresonym
|
21
|
+
else
|
22
|
+
value[:type] = :alt_placement
|
23
|
+
end
|
24
|
+
end
|
25
|
+
@matches
|
26
|
+
end
|
27
|
+
|
28
|
+
def find_pairs(names, threshold = 0)
|
29
|
+
pairs = get_pairs(names)
|
30
|
+
pairs.each do |pair|
|
31
|
+
key = [pair[0][0], pair[1][0]]
|
32
|
+
total_distance = get_total_distance(pair[0][1], pair[1][1])
|
33
|
+
value = {:total_distance => total_distance}
|
34
|
+
@matches[key] = value if !@matches.has_key?(key) && (threshold == 0 || total_distance <= threshold)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def get_total_distance(path1, path2)
|
39
|
+
total_distance = path1.size + path2.size
|
40
|
+
count = 0
|
41
|
+
path1.zip(path2).each do |pair|
|
42
|
+
break if pair[0] != pair[1]
|
43
|
+
count += 1
|
44
|
+
end
|
45
|
+
total_distance - count * 2
|
46
|
+
end
|
47
|
+
|
48
|
+
def get_pairs(names)
|
49
|
+
names = names.map { |n| [n[0], n[1].to_s.split("|")] }
|
50
|
+
pairs = []
|
51
|
+
until names.empty?
|
52
|
+
name = names.pop
|
53
|
+
names.each {|n| pairs << [name, n].sort}
|
54
|
+
end
|
55
|
+
pairs
|
56
|
+
end
|
57
|
+
|
58
|
+
def species_epithet_duplicates(threshold_distance)
|
59
|
+
SynonymFinder.logger_write(@synonym_finder.object_id, "Processing species epithets")
|
60
|
+
@db.execute("select epithet_stem from name_parts group by epithet_stem having count(*) > 1").each_with_index do |stem, i|
|
61
|
+
i = i + 1
|
62
|
+
SynonymFinder.logger_write(@synonym_finder.object_id, "Processing species epithet candidate %s" % i) if i % 100 == 0
|
63
|
+
names = @db.execute("select name_id, path from name_parts where epithet_stem = ?", stem)
|
64
|
+
find_pairs(names, threshold_distance)
|
65
|
+
end
|
66
|
+
count = 0
|
67
|
+
SynonymFinder.logger_write(@synonym_finder.object_id, "Assigning type to found matches")
|
68
|
+
@matches.each do |key, value|
|
69
|
+
next if value.has_key?(:type)
|
70
|
+
count += 1
|
71
|
+
SynonymFinder.logger_write(@synonym_finder.object_id, "Processing match %s" % count) if count % 10000 == 0
|
72
|
+
if value[:total_distance] == 0
|
73
|
+
epithets = @db.execute("select distinct epithet from name_parts where name_id in (#{key.join(",")})")
|
74
|
+
if epithets.size == 1
|
75
|
+
value[:type] = :misplaced_synonym
|
76
|
+
else
|
77
|
+
genera = @db.execute("select canonical from name_parts where name_id in (#{key.join(",")})").map { |c| c[0].split(" ")[0] }.uniq
|
78
|
+
value[:type] = genera.size == 1 ? :lexical_variant : :misplaced_synonym
|
79
|
+
end
|
80
|
+
else
|
81
|
+
value[:type] = :homotypic
|
82
|
+
end
|
83
|
+
end
|
84
|
+
@matches
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
class SynonymFinder
|
2
|
+
class GroupOrganizer
|
3
|
+
|
4
|
+
def initialize(synonym_finder)
|
5
|
+
@synonym_finder = synonym_finder
|
6
|
+
@db = @synonym_finder.db
|
7
|
+
@groups = {}
|
8
|
+
end
|
9
|
+
|
10
|
+
# Finds duplication groups for a name. A name can be one or more duplication groups: chresonym, lexical variant, homotypic, alt placement
|
11
|
+
def organize
|
12
|
+
SynonymFinder.logger_write(@synonym_finder.object_id, "Grouping results")
|
13
|
+
organize_matches
|
14
|
+
#organize_partial_matches
|
15
|
+
get_output
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def organize_matches
|
21
|
+
@last_id = 1
|
22
|
+
count = 0
|
23
|
+
@synonym_finder.matches.each do |key, value|
|
24
|
+
count += 1
|
25
|
+
SynonymFinder.logger_write(@synonym_finder.object_id, "Grouping match %s" % count) if count % 10000 == 0
|
26
|
+
gr1 = get_group(key[0], value[:type])
|
27
|
+
gr2 = get_group(key[1], value[:type])
|
28
|
+
if gr1 && gr2
|
29
|
+
update_group(gr1, gr2) if gr1 != gr2
|
30
|
+
key.each { |name_id| update_score(name_id, value) }
|
31
|
+
elsif !gr1 && !gr2
|
32
|
+
create_group(key, value)
|
33
|
+
else
|
34
|
+
add_to_group(key, value)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def organize_partial_matches
|
40
|
+
added = {}
|
41
|
+
count = 0
|
42
|
+
@synonym_finder.part_matches.each do |key, value|
|
43
|
+
count += 1
|
44
|
+
SynonymFinder.logger_write(@synonym_finder.object_id, "Adding partial matches %s" % count) if count % 10000 == 0
|
45
|
+
gr1 = get_group(key[0], value[:type])
|
46
|
+
gr2 = get_group(key[1], value[:type])
|
47
|
+
if gr1 || gr2
|
48
|
+
group_id, name_id, name_id_db = gr1 ? [gr1, key[1], key[0]] : [gr2, key[0], key[1]] #name without authorship
|
49
|
+
unless added[name_id] && added[name_id][name_id_db]
|
50
|
+
score = get_score(value)
|
51
|
+
@db.execute("insert into names_groups (name_id, group_id, score_max, score_sum, score_num) values (?, ?, ?, ?, 1)", [name_id, group_id, score, score])
|
52
|
+
added[name_id] = { name_id_db => 1 }
|
53
|
+
end
|
54
|
+
else
|
55
|
+
create_group(key, value)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def get_group(name_id, type)
|
61
|
+
return nil unless @groups[name_id]
|
62
|
+
@groups[name_id][type]
|
63
|
+
end
|
64
|
+
|
65
|
+
def create_group(key, value)
|
66
|
+
@db.execute("insert into groups (id, type) values (?, ?)", [@last_id, value[:type].to_s])
|
67
|
+
key.each {|i| @groups[i] = {} unless @groups.has_key?(i) }
|
68
|
+
score = get_score(value)
|
69
|
+
@groups[key[0]][value[:type]] = @groups[key[1]][value[:type]] = @last_id
|
70
|
+
@db.execute("insert into names_groups (name_id, group_id, score_max, score_sum, score_num) values (?, ?, ?, ?, 1)", [key[0], @last_id, score, score])
|
71
|
+
@db.execute("insert into names_groups (name_id, group_id, score_max, score_sum, score_num) values (?, ?, ?, ?, 1)", [key[1], @last_id, score, score])
|
72
|
+
@last_id += 1
|
73
|
+
end
|
74
|
+
|
75
|
+
def update_group(gr1, gr2)
|
76
|
+
@db.execute("update names_groups set group_id = ? where group_id = ?", [gr1, gr2])
|
77
|
+
@db.execute("delete from groups where id = ?", gr2)
|
78
|
+
end
|
79
|
+
|
80
|
+
def add_to_group(key, value)
|
81
|
+
gr1 = get_group(key[0], value[:type])
|
82
|
+
gr2 = get_group(key[1], value[:type])
|
83
|
+
name_id1, name_id2, group_id = gr1 ? [key[1], key[0], gr1] : [key[0], key[1], gr2]
|
84
|
+
update_score(name_id2, value)
|
85
|
+
score = get_score(value)
|
86
|
+
@groups[name_id1] = {} unless @groups.has_key?(name_id1)
|
87
|
+
@groups[name_id1][value[:type]] = group_id
|
88
|
+
@db.execute("insert into names_groups (name_id, group_id, score_max, score_sum, score_num) values (?, ?, ?, ?, 1)", [name_id1, group_id, score, score])
|
89
|
+
end
|
90
|
+
|
91
|
+
def update_score(name_id, value)
|
92
|
+
score = get_score(value)
|
93
|
+
group_id = get_group(name_id, value[:type])
|
94
|
+
@db.execute("update names_groups set score_max = max(score_max, ?), score_sum = score_sum + ?, score_num = score_num + 1 where name_id = ? and group_id = ?", [score, score, name_id, group_id])
|
95
|
+
end
|
96
|
+
|
97
|
+
def get_score(value)
|
98
|
+
return 100 if value[:type] == :chresonym
|
99
|
+
return 10 if value[:alt_placement] && value[:total_length] > 8
|
100
|
+
score = value[:auth_match]
|
101
|
+
end
|
102
|
+
|
103
|
+
def get_output
|
104
|
+
data = @db.execute("select x.group_id, g.type, ng.name_id from (select group_id from names_groups group by group_id order by count(*), group_id) x join names_groups ng on x.group_id = ng.group_id join names n on n.id = ng.name_id join groups g on g.id = ng.group_id")
|
105
|
+
group = 0
|
106
|
+
res = []
|
107
|
+
current_group = nil
|
108
|
+
data.each do |group_id, type, name_id|
|
109
|
+
if group_id != group
|
110
|
+
res << current_group if current_group
|
111
|
+
group = group_id
|
112
|
+
current_group = { :type => type, :name_ids => [name_id] }
|
113
|
+
else
|
114
|
+
current_group[:name_ids] << name_id
|
115
|
+
end
|
116
|
+
end
|
117
|
+
res
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|
121
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
|
+
require 'rspec'
|
4
|
+
require 'ostruct'
|
5
|
+
require 'synonym-finder'
|
6
|
+
|
7
|
+
# Requires supporting files with custom matchers and macros, etc,
|
8
|
+
# in ./support/ and its subdirectories.
|
9
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
10
|
+
|
11
|
+
RSpec.configure do |config|
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
class SynonymFinder::Spec
|
16
|
+
Config = OpenStruct.new(
|
17
|
+
:input => INPUT
|
18
|
+
)
|
19
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
class SynonymFinder::Spec
|
2
|
+
|
3
|
+
INPUT = [
|
4
|
+
{id: 001, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Gnamptogenys", name: "Gnamptogenys porcata (Emery, 1896)"},
|
5
|
+
{id: 003, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Gnamptogenys", name: "Gnamptogenys triangularis (Mayr, 1887)"},
|
6
|
+
{id: 004, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Gnamptogenys", name: "Gnamptogenys triangularis var. alba Brown 1992"},
|
7
|
+
{id: 005, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Gnamptogenys", name: "Gnamptogenys triangularis var. borealis Brown 1992"},
|
8
|
+
{id: 100, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Nylanderia", name: "Nylanderia porcata"}, #match 001, no authorhsip
|
9
|
+
{id: 101, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Nylanderia", name: "Nylanderia porcatum Emery, 1896"}, #match 001 by stem
|
10
|
+
{id: 102, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Nylanderia", name: "Nylanderia porcatum"}, #match 001 by stem
|
11
|
+
{id: 200, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Brachymyrmex", name: "Brachymyrmex obscurior Forel, 1893"},
|
12
|
+
{id: 201, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Brachymyrmex", name: "Brachymyrmex brevicornis Emery, 1906"},
|
13
|
+
{id: 202, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Brachymyrmex", name: "Brachymyrmex patagonicus Mayr, 1868"},
|
14
|
+
{id: 203, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Brachymyrmex", name: "Brachymyrmex minutus Forel, 1893"},
|
15
|
+
{id: 204, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Brachymyrmex", name: "Brachymyrmex minutus Brown, 2010"}, #chresonym match with 203
|
16
|
+
{id: 205, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Brachymyrmex", name: "Brachymyrmex micropeda Forel, 1893"},
|
17
|
+
{id: 300, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Neobrachymyrmex", name: "Neobrachymyrmex obscurior"}, #match 200 no auth
|
18
|
+
{id: 301, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Neobrachymyrmex", name: "Neobrachymyrmex brevicornis"}, #match 201 no auth
|
19
|
+
{id: 302, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Neobrachymyrmex", name: "Neobrachymyrmex patagonicus Mayr, 1868"}, #match 203 auth
|
20
|
+
{id: 303, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Neobrachymyrmex", name: "Neobrachymyrmex minutus Forel"}, #match 204 no auth (part)
|
21
|
+
{id: 304, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Neobrachymyrmex", name: "Neobrachymyrmex micropeda Brown 1995"}, #match 205 no auth
|
22
|
+
{id: 400, path: "Animalia|Athropoda|Insecta|Hymenoptera|Formicidae|Crematogaster", name: "Crematogaster obscurata Emery, 1895"},
|
23
|
+
{id: 500, path: "Animalia|Athropoda|Insecta|Hymenoptera|Tiphiidae|Diamma", name: "Diamma obscurata (Emery, 1895)"}, #match 2 degrees 400 auth
|
24
|
+
{id: 600, path: "Animalia|Athropoda|Insecta|Hymenoptera|Tiphiidae|Crematogaster", name: "Crematogaster obscurata Em. 1895"}, #full name match
|
25
|
+
{id: 700, path: "Animalia|Something1|Something2|Something3|Something4|Somename", name: "Somename obscurata Emery"}, #distance over threshold
|
26
|
+
{id: 800, path: "Animalia|Athropoda|Insecta|Hymenoptera|Tiphiidae|Neobrachymyrmex", name: "Neobrachymyrmex obscurata (Emery, 1895)"}, #match 2 degrees 400 auth
|
27
|
+
{id: 801, path: "Animalia|Athropoda|Insecta|Hymenoptera|Tiphiidae|Neobrachymyrmex", name: "Brachymyrmex obscuratum (Emery, 1895)"}, #misplaced synonym by stem 800
|
28
|
+
{id: 802, path: "Animalia|Athropoda|Insecta|Hymenoptera|Tiphiidae|Neobrachymyrmex", name: "Brachymyrmex obscurata (Emery, 1895)"}, #misplaced synonym by epithet 800
|
29
|
+
{id: 803, path: "Animalia|Athropoda|Insecta|Hymenoptera|Tiphiidae|Neobrachymyrmex", name: "Neobrachymyrmex obscuratum (Emery, 1895)"}, #lex var by epithet 800
|
30
|
+
]
|
31
|
+
|
32
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe "SynonymFinder" do
|
4
|
+
before(:all) do
|
5
|
+
SynonymFinder.logger = Logger.new($stdout)
|
6
|
+
@sf = SynonymFinder.new(SynonymFinder::Spec::Config.input)
|
7
|
+
# @sf = SynonymFinder.new(open(File.dirname(__FILE__) + "/support/union_data.txt").read)
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should able to ingest input in correct format" do
|
11
|
+
@sf.input.is_a?(Array).should be_true
|
12
|
+
@sf.input[0].keys.should == [:id, :path, :name]
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should be able to find species epithet duplications" do
|
16
|
+
output = @sf.find_matches
|
17
|
+
m = @sf.matches
|
18
|
+
m[[1, 100]].should be_nil # 1 name has no auth
|
19
|
+
m[[1, 101]].should == {:total_distance=>2, :type=>:homotypic, :auth_match=>100}
|
20
|
+
m[[1, 102]].should be_nil # 1 name has no auth
|
21
|
+
m[[203, 204]].should == {:total_distance=>0, :type=>:chresonym, :auth_match=>0}
|
22
|
+
m[[202, 302]].should == {:total_distance=>2, :type=>:homotypic, :auth_match=>100}
|
23
|
+
m[[400, 500]].should == {:total_distance=>4, :type=>:homotypic, :auth_match=>100}
|
24
|
+
m[[400, 600]].should == {:total_distance=>4, :type=>:alt_placement, :auth_match=>100}
|
25
|
+
m[[400, 700]].should be_nil
|
26
|
+
m[[400, 800]].should == {:total_distance=>4, :type=>:homotypic, :auth_match=>100}
|
27
|
+
m[[800, 801]].should == {:total_distance=>0, :type=>:misplaced_synonym, :auth_match=>100}
|
28
|
+
m[[800, 802]].should == {:total_distance=>0, :type=>:misplaced_synonym, :auth_match=>100}
|
29
|
+
m[[800, 803]].should == {:total_distance=>0, :type=>:lexical_variant, :auth_match=>100}
|
30
|
+
output.should == [{:type=>"chresonym", :name_ids=>[203, 204]}, {:type=>"alt_placement", :name_ids=>[400, 600]}, {:type=>"chresonym", :name_ids=>[101, 102]}, {:type=>"homotypic", :name_ids=>[203, 303]}, {:type=>"lexical_variant", :name_ids=>[800, 803]}, {:type=>"lexical_variant", :name_ids=>[801, 802]}, {:type=>"homotypic", :name_ids=>[202, 302]}, {:type=>"homotypic", :name_ids=>[1, 101]}, {:type=>"misplaced_synonym", :name_ids=>[801, 803, 802, 800]}]
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{synonym-finder}
|
8
|
+
s.version = "0.2.2"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = [%q{Dmitry Mozzherin}]
|
12
|
+
s.date = %q{2011-08-12}
|
13
|
+
s.description = %q{Synonym finder is a biodiversity tool for finding homotypic nomenclatural synonyms in taxonomic hierarchies.}
|
14
|
+
s.email = %q{dmozzherin@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE.txt",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".rspec",
|
22
|
+
".rvmrc",
|
23
|
+
"Gemfile",
|
24
|
+
"Gemfile.lock",
|
25
|
+
"LICENSE.txt",
|
26
|
+
"README.rdoc",
|
27
|
+
"Rakefile",
|
28
|
+
"VERSION",
|
29
|
+
"features/step_definitions/synonym-finder_steps.rb",
|
30
|
+
"features/support/env.rb",
|
31
|
+
"features/synonym-finder.feature",
|
32
|
+
"lib/synonym-finder.rb",
|
33
|
+
"lib/synonym-finder/duplicate_finder.rb",
|
34
|
+
"lib/synonym-finder/group_organizer.rb",
|
35
|
+
"spec/spec_helper.rb",
|
36
|
+
"spec/support/input.rb",
|
37
|
+
"spec/synonym-finder_spec.rb",
|
38
|
+
"synonym-finder.gemspec"
|
39
|
+
]
|
40
|
+
s.homepage = %q{http://github.com/dimus/synonym-finder}
|
41
|
+
s.licenses = [%q{MIT}]
|
42
|
+
s.require_paths = [%q{lib}]
|
43
|
+
s.rubygems_version = %q{1.8.6}
|
44
|
+
s.summary = %q{Synonym finder is a biodiversity tool for finding homotypic nomenclatural synonyms in taxonomic hierarchies.}
|
45
|
+
|
46
|
+
if s.respond_to? :specification_version then
|
47
|
+
s.specification_version = 3
|
48
|
+
|
49
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
50
|
+
s.add_runtime_dependency(%q<sqlite3>, [">= 0"])
|
51
|
+
s.add_runtime_dependency(%q<taxamatch_rb>, [">= 0"])
|
52
|
+
s.add_runtime_dependency(%q<biodiversity19>, [">= 0"])
|
53
|
+
s.add_runtime_dependency(%q<ruby-stemmer>, [">= 0"])
|
54
|
+
s.add_development_dependency(%q<ruby-debug19>, [">= 0"])
|
55
|
+
s.add_development_dependency(%q<rspec>, ["~> 2.3.0"])
|
56
|
+
s.add_development_dependency(%q<cucumber>, [">= 0"])
|
57
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
58
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.6.0"])
|
59
|
+
s.add_development_dependency(%q<rcov>, [">= 0"])
|
60
|
+
else
|
61
|
+
s.add_dependency(%q<sqlite3>, [">= 0"])
|
62
|
+
s.add_dependency(%q<taxamatch_rb>, [">= 0"])
|
63
|
+
s.add_dependency(%q<biodiversity19>, [">= 0"])
|
64
|
+
s.add_dependency(%q<ruby-stemmer>, [">= 0"])
|
65
|
+
s.add_dependency(%q<ruby-debug19>, [">= 0"])
|
66
|
+
s.add_dependency(%q<rspec>, ["~> 2.3.0"])
|
67
|
+
s.add_dependency(%q<cucumber>, [">= 0"])
|
68
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
69
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.0"])
|
70
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
71
|
+
end
|
72
|
+
else
|
73
|
+
s.add_dependency(%q<sqlite3>, [">= 0"])
|
74
|
+
s.add_dependency(%q<taxamatch_rb>, [">= 0"])
|
75
|
+
s.add_dependency(%q<biodiversity19>, [">= 0"])
|
76
|
+
s.add_dependency(%q<ruby-stemmer>, [">= 0"])
|
77
|
+
s.add_dependency(%q<ruby-debug19>, [">= 0"])
|
78
|
+
s.add_dependency(%q<rspec>, ["~> 2.3.0"])
|
79
|
+
s.add_dependency(%q<cucumber>, [">= 0"])
|
80
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
81
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.0"])
|
82
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
metadata
ADDED
@@ -0,0 +1,181 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: synonym-finder
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.2
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Dmitry Mozzherin
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-08-12 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: sqlite3
|
16
|
+
requirement: &70213009815740 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70213009815740
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: taxamatch_rb
|
27
|
+
requirement: &70213009815220 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70213009815220
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: biodiversity19
|
38
|
+
requirement: &70213009814740 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :runtime
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *70213009814740
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: ruby-stemmer
|
49
|
+
requirement: &70213009814220 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
type: :runtime
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *70213009814220
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: ruby-debug19
|
60
|
+
requirement: &70213009813740 !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ! '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
66
|
+
type: :development
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *70213009813740
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: &70213009813260 !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ~>
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: 2.3.0
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: *70213009813260
|
80
|
+
- !ruby/object:Gem::Dependency
|
81
|
+
name: cucumber
|
82
|
+
requirement: &70213009812780 !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ! '>='
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: '0'
|
88
|
+
type: :development
|
89
|
+
prerelease: false
|
90
|
+
version_requirements: *70213009812780
|
91
|
+
- !ruby/object:Gem::Dependency
|
92
|
+
name: bundler
|
93
|
+
requirement: &70213009812300 !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ~>
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: 1.0.0
|
99
|
+
type: :development
|
100
|
+
prerelease: false
|
101
|
+
version_requirements: *70213009812300
|
102
|
+
- !ruby/object:Gem::Dependency
|
103
|
+
name: jeweler
|
104
|
+
requirement: &70213009811820 !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ~>
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 1.6.0
|
110
|
+
type: :development
|
111
|
+
prerelease: false
|
112
|
+
version_requirements: *70213009811820
|
113
|
+
- !ruby/object:Gem::Dependency
|
114
|
+
name: rcov
|
115
|
+
requirement: &70213009811340 !ruby/object:Gem::Requirement
|
116
|
+
none: false
|
117
|
+
requirements:
|
118
|
+
- - ! '>='
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: '0'
|
121
|
+
type: :development
|
122
|
+
prerelease: false
|
123
|
+
version_requirements: *70213009811340
|
124
|
+
description: Synonym finder is a biodiversity tool for finding homotypic nomenclatural
|
125
|
+
synonyms in taxonomic hierarchies.
|
126
|
+
email: dmozzherin@gmail.com
|
127
|
+
executables: []
|
128
|
+
extensions: []
|
129
|
+
extra_rdoc_files:
|
130
|
+
- LICENSE.txt
|
131
|
+
- README.rdoc
|
132
|
+
files:
|
133
|
+
- .document
|
134
|
+
- .rspec
|
135
|
+
- .rvmrc
|
136
|
+
- Gemfile
|
137
|
+
- Gemfile.lock
|
138
|
+
- LICENSE.txt
|
139
|
+
- README.rdoc
|
140
|
+
- Rakefile
|
141
|
+
- VERSION
|
142
|
+
- features/step_definitions/synonym-finder_steps.rb
|
143
|
+
- features/support/env.rb
|
144
|
+
- features/synonym-finder.feature
|
145
|
+
- lib/synonym-finder.rb
|
146
|
+
- lib/synonym-finder/duplicate_finder.rb
|
147
|
+
- lib/synonym-finder/group_organizer.rb
|
148
|
+
- spec/spec_helper.rb
|
149
|
+
- spec/support/input.rb
|
150
|
+
- spec/synonym-finder_spec.rb
|
151
|
+
- synonym-finder.gemspec
|
152
|
+
homepage: http://github.com/dimus/synonym-finder
|
153
|
+
licenses:
|
154
|
+
- MIT
|
155
|
+
post_install_message:
|
156
|
+
rdoc_options: []
|
157
|
+
require_paths:
|
158
|
+
- lib
|
159
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
160
|
+
none: false
|
161
|
+
requirements:
|
162
|
+
- - ! '>='
|
163
|
+
- !ruby/object:Gem::Version
|
164
|
+
version: '0'
|
165
|
+
segments:
|
166
|
+
- 0
|
167
|
+
hash: 2631280896359598637
|
168
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
169
|
+
none: false
|
170
|
+
requirements:
|
171
|
+
- - ! '>='
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0'
|
174
|
+
requirements: []
|
175
|
+
rubyforge_project:
|
176
|
+
rubygems_version: 1.8.6
|
177
|
+
signing_key:
|
178
|
+
specification_version: 3
|
179
|
+
summary: Synonym finder is a biodiversity tool for finding homotypic nomenclatural
|
180
|
+
synonyms in taxonomic hierarchies.
|
181
|
+
test_files: []
|