name-spotter 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +2 -0
- data/lib/name-spotter.rb +7 -12
- data/lib/name-spotter/version.rb +1 -1
- data/name-spotter.gemspec +1 -1
- data/script/test_env +6 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 26690e89eabc22553fc8e06a27505cd6d86c8e14
|
4
|
+
data.tar.gz: 6149a65867112650a84a4a3c2b90fdbf0d96594b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 909938befd5a65faef14316724c9cc2bee4320f2d6623f507b1fde9c8e520d47c7022782fa52b23f414b47dc78f0ef60f9c6d7714ffa6fe7f9763e58c6b9ffe8
|
7
|
+
data.tar.gz: 9a46a26c1a91de8d96795b1b9e2f9266ab592fc6d917ab77c740e0621d0fe979a8ef0eb1dc580c9318ec9c4679f6a7cd9e0f595bc9cc5bfa47c1a3e34ba25162
|
data/CHANGELOG
CHANGED
data/lib/name-spotter.rb
CHANGED
@@ -5,13 +5,12 @@ require "json"
|
|
5
5
|
require "nokogiri"
|
6
6
|
require "socket"
|
7
7
|
require "unicode_utils"
|
8
|
-
require
|
9
|
-
require File.join(File.dirname(__FILE__),
|
8
|
+
require "unsupervised-language-detection"
|
9
|
+
require File.join(File.dirname(__FILE__), "name-spotter", "client")
|
10
10
|
|
11
11
|
Dir["#{File.dirname(__FILE__)}/name-spotter/**/*.rb"].each {|f| require f}
|
12
12
|
|
13
13
|
class NameSpotter
|
14
|
-
|
15
14
|
def self.english?(text)
|
16
15
|
tweets = text.split(/\s+/).inject([]) do |res, w|
|
17
16
|
if w.match(/[A-Za-z]/)
|
@@ -23,10 +22,10 @@ class NameSpotter
|
|
23
22
|
end
|
24
23
|
res
|
25
24
|
end
|
26
|
-
eng, not_eng = tweets.shuffle[0...50].partition do |a|
|
25
|
+
eng, not_eng = tweets.shuffle[0...50].partition do |a|
|
27
26
|
UnsupervisedLanguageDetection.is_english_tweet?(a.join(" "))
|
28
27
|
end
|
29
|
-
percentage = eng.size.to_f/(not_eng.size + eng.size)
|
28
|
+
percentage = eng.size.to_f/(not_eng.size + eng.size)
|
30
29
|
percentage > 0.5
|
31
30
|
end
|
32
31
|
|
@@ -42,17 +41,15 @@ class NameSpotter
|
|
42
41
|
format == "json" ? to_json(names) : to_xml(names)
|
43
42
|
end
|
44
43
|
|
45
|
-
|
46
44
|
private
|
47
|
-
|
48
45
|
def to_text(input)
|
49
46
|
input
|
50
47
|
end
|
51
|
-
|
48
|
+
|
52
49
|
def to_json(names)
|
53
50
|
return JSON.fast_generate({ names: names })
|
54
51
|
end
|
55
|
-
|
52
|
+
|
56
53
|
def to_xml(names)
|
57
54
|
builder = Nokogiri::XML::Builder.new do |xml|
|
58
55
|
xml.names do
|
@@ -61,11 +58,9 @@ class NameSpotter
|
|
61
58
|
xml.scientificName name[:scientificName]
|
62
59
|
xml.offsetStart name[:offsetStart]
|
63
60
|
xml.offsetEnd name[:offsetEnd]
|
64
|
-
end
|
61
|
+
end
|
65
62
|
end
|
66
63
|
end
|
67
64
|
builder.to_xml
|
68
65
|
end
|
69
|
-
|
70
66
|
end
|
71
|
-
|
data/lib/name-spotter/version.rb
CHANGED
data/name-spotter.gemspec
CHANGED
@@ -20,7 +20,7 @@ Gem::Specification.new do |gem|
|
|
20
20
|
gem.add_runtime_dependency "rake", "~> 10.5"
|
21
21
|
gem.add_runtime_dependency "rest-client", "~> 1.8"
|
22
22
|
gem.add_runtime_dependency "nokogiri", "~> 1.6"
|
23
|
-
gem.add_runtime_dependency "builder", "~> 3.
|
23
|
+
gem.add_runtime_dependency "builder", "~> 3.1"
|
24
24
|
gem.add_runtime_dependency "json", "~> 1.8"
|
25
25
|
gem.add_runtime_dependency "unicode_utils", "~> 1.4"
|
26
26
|
gem.add_runtime_dependency "unsupervised-language-detection", "~> 0.0.6"
|
data/script/test_env
ADDED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: name-spotter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Anthony Goddard
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2016-
|
14
|
+
date: 2016-03-01 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: rake
|
@@ -61,14 +61,14 @@ dependencies:
|
|
61
61
|
requirements:
|
62
62
|
- - "~>"
|
63
63
|
- !ruby/object:Gem::Version
|
64
|
-
version: '3.
|
64
|
+
version: '3.1'
|
65
65
|
type: :runtime
|
66
66
|
prerelease: false
|
67
67
|
version_requirements: !ruby/object:Gem::Requirement
|
68
68
|
requirements:
|
69
69
|
- - "~>"
|
70
70
|
- !ruby/object:Gem::Version
|
71
|
-
version: '3.
|
71
|
+
version: '3.1'
|
72
72
|
- !ruby/object:Gem::Dependency
|
73
73
|
name: json
|
74
74
|
requirement: !ruby/object:Gem::Requirement
|
@@ -181,6 +181,7 @@ files:
|
|
181
181
|
- lib/name-spotter/taxon_finder_client.rb
|
182
182
|
- lib/name-spotter/version.rb
|
183
183
|
- name-spotter.gemspec
|
184
|
+
- script/test_env
|
184
185
|
- spec/files/english.txt
|
185
186
|
- spec/files/journalofentomol13pomo_0018.txt
|
186
187
|
- spec/files/journalofentomol13pomo_0063.txt
|