name-spotter 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +2 -0
- data/lib/name-spotter.rb +7 -12
- data/lib/name-spotter/version.rb +1 -1
- data/name-spotter.gemspec +1 -1
- data/script/test_env +6 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 26690e89eabc22553fc8e06a27505cd6d86c8e14
|
4
|
+
data.tar.gz: 6149a65867112650a84a4a3c2b90fdbf0d96594b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 909938befd5a65faef14316724c9cc2bee4320f2d6623f507b1fde9c8e520d47c7022782fa52b23f414b47dc78f0ef60f9c6d7714ffa6fe7f9763e58c6b9ffe8
|
7
|
+
data.tar.gz: 9a46a26c1a91de8d96795b1b9e2f9266ab592fc6d917ab77c740e0621d0fe979a8ef0eb1dc580c9318ec9c4679f6a7cd9e0f595bc9cc5bfa47c1a3e34ba25162
|
data/CHANGELOG
CHANGED
data/lib/name-spotter.rb
CHANGED
@@ -5,13 +5,12 @@ require "json"
|
|
5
5
|
require "nokogiri"
|
6
6
|
require "socket"
|
7
7
|
require "unicode_utils"
|
8
|
-
require
|
9
|
-
require File.join(File.dirname(__FILE__),
|
8
|
+
require "unsupervised-language-detection"
|
9
|
+
require File.join(File.dirname(__FILE__), "name-spotter", "client")
|
10
10
|
|
11
11
|
Dir["#{File.dirname(__FILE__)}/name-spotter/**/*.rb"].each {|f| require f}
|
12
12
|
|
13
13
|
class NameSpotter
|
14
|
-
|
15
14
|
def self.english?(text)
|
16
15
|
tweets = text.split(/\s+/).inject([]) do |res, w|
|
17
16
|
if w.match(/[A-Za-z]/)
|
@@ -23,10 +22,10 @@ class NameSpotter
|
|
23
22
|
end
|
24
23
|
res
|
25
24
|
end
|
26
|
-
eng, not_eng = tweets.shuffle[0...50].partition do |a|
|
25
|
+
eng, not_eng = tweets.shuffle[0...50].partition do |a|
|
27
26
|
UnsupervisedLanguageDetection.is_english_tweet?(a.join(" "))
|
28
27
|
end
|
29
|
-
percentage = eng.size.to_f/(not_eng.size + eng.size)
|
28
|
+
percentage = eng.size.to_f/(not_eng.size + eng.size)
|
30
29
|
percentage > 0.5
|
31
30
|
end
|
32
31
|
|
@@ -42,17 +41,15 @@ class NameSpotter
|
|
42
41
|
format == "json" ? to_json(names) : to_xml(names)
|
43
42
|
end
|
44
43
|
|
45
|
-
|
46
44
|
private
|
47
|
-
|
48
45
|
def to_text(input)
|
49
46
|
input
|
50
47
|
end
|
51
|
-
|
48
|
+
|
52
49
|
def to_json(names)
|
53
50
|
return JSON.fast_generate({ names: names })
|
54
51
|
end
|
55
|
-
|
52
|
+
|
56
53
|
def to_xml(names)
|
57
54
|
builder = Nokogiri::XML::Builder.new do |xml|
|
58
55
|
xml.names do
|
@@ -61,11 +58,9 @@ class NameSpotter
|
|
61
58
|
xml.scientificName name[:scientificName]
|
62
59
|
xml.offsetStart name[:offsetStart]
|
63
60
|
xml.offsetEnd name[:offsetEnd]
|
64
|
-
end
|
61
|
+
end
|
65
62
|
end
|
66
63
|
end
|
67
64
|
builder.to_xml
|
68
65
|
end
|
69
|
-
|
70
66
|
end
|
71
|
-
|
data/lib/name-spotter/version.rb
CHANGED
data/name-spotter.gemspec
CHANGED
@@ -20,7 +20,7 @@ Gem::Specification.new do |gem|
|
|
20
20
|
gem.add_runtime_dependency "rake", "~> 10.5"
|
21
21
|
gem.add_runtime_dependency "rest-client", "~> 1.8"
|
22
22
|
gem.add_runtime_dependency "nokogiri", "~> 1.6"
|
23
|
-
gem.add_runtime_dependency "builder", "~> 3.
|
23
|
+
gem.add_runtime_dependency "builder", "~> 3.1"
|
24
24
|
gem.add_runtime_dependency "json", "~> 1.8"
|
25
25
|
gem.add_runtime_dependency "unicode_utils", "~> 1.4"
|
26
26
|
gem.add_runtime_dependency "unsupervised-language-detection", "~> 0.0.6"
|
data/script/test_env
ADDED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: name-spotter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Anthony Goddard
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2016-
|
14
|
+
date: 2016-03-01 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: rake
|
@@ -61,14 +61,14 @@ dependencies:
|
|
61
61
|
requirements:
|
62
62
|
- - "~>"
|
63
63
|
- !ruby/object:Gem::Version
|
64
|
-
version: '3.
|
64
|
+
version: '3.1'
|
65
65
|
type: :runtime
|
66
66
|
prerelease: false
|
67
67
|
version_requirements: !ruby/object:Gem::Requirement
|
68
68
|
requirements:
|
69
69
|
- - "~>"
|
70
70
|
- !ruby/object:Gem::Version
|
71
|
-
version: '3.
|
71
|
+
version: '3.1'
|
72
72
|
- !ruby/object:Gem::Dependency
|
73
73
|
name: json
|
74
74
|
requirement: !ruby/object:Gem::Requirement
|
@@ -181,6 +181,7 @@ files:
|
|
181
181
|
- lib/name-spotter/taxon_finder_client.rb
|
182
182
|
- lib/name-spotter/version.rb
|
183
183
|
- name-spotter.gemspec
|
184
|
+
- script/test_env
|
184
185
|
- spec/files/english.txt
|
185
186
|
- spec/files/journalofentomol13pomo_0018.txt
|
186
187
|
- spec/files/journalofentomol13pomo_0063.txt
|