treat 0.1.4 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +4 -4
- data/TODO +21 -54
- data/lib/economist/half_cocked_basel.txt +16 -0
- data/lib/economist/hose_and_dry.doc +0 -0
- data/lib/economist/hungarys_troubles.abw +70 -0
- data/lib/economist/republican_nomination.pdf +0 -0
- data/lib/economist/saving_the_euro.odt +0 -0
- data/lib/economist/to_infinity_and_beyond.txt +15 -0
- data/lib/economist/zero_sum.html +91 -0
- data/lib/treat.rb +58 -72
- data/lib/treat/buildable.rb +59 -15
- data/lib/treat/categories.rb +26 -14
- data/lib/treat/category.rb +2 -2
- data/lib/treat/delegatable.rb +65 -48
- data/lib/treat/doable.rb +44 -0
- data/lib/treat/entities.rb +34 -14
- data/lib/treat/entities/collection.rb +2 -0
- data/lib/treat/entities/document.rb +3 -2
- data/lib/treat/entities/entity.rb +105 -90
- data/lib/treat/entities/phrases.rb +17 -0
- data/lib/treat/entities/tokens.rb +28 -13
- data/lib/treat/entities/zones.rb +20 -0
- data/lib/treat/extractors.rb +49 -11
- data/lib/treat/extractors/coreferences/stanford.rb +68 -0
- data/lib/treat/extractors/date/chronic.rb +32 -0
- data/lib/treat/extractors/date/ruby.rb +25 -0
- data/lib/treat/extractors/keywords/tf_idf.rb +26 -0
- data/lib/treat/extractors/keywords/{topics_frequency.rb → topics_tf_idf.rb} +15 -7
- data/lib/treat/{detectors/language/language_detector.rb → extractors/language/language_extractor.rb} +5 -2
- data/lib/treat/extractors/language/what_language.rb +49 -0
- data/lib/treat/extractors/named_entity_tag/stanford.rb +53 -0
- data/lib/treat/extractors/roles/naive.rb +73 -0
- data/lib/treat/extractors/statistics/frequency_in.rb +6 -13
- data/lib/treat/extractors/statistics/{position_in_parent.rb → position_in.rb} +1 -1
- data/lib/treat/extractors/statistics/tf_idf.rb +89 -21
- data/lib/treat/extractors/statistics/transition_matrix.rb +11 -11
- data/lib/treat/extractors/statistics/transition_probability.rb +4 -4
- data/lib/treat/extractors/time/nickel.rb +30 -12
- data/lib/treat/extractors/topic_words/lda.rb +9 -9
- data/lib/treat/extractors/topics/reuters.rb +14 -15
- data/lib/treat/extractors/topics/reuters/region.xml +1 -0
- data/lib/treat/features.rb +7 -0
- data/lib/treat/formatters/readers/abw.rb +6 -1
- data/lib/treat/formatters/readers/autoselect.rb +5 -6
- data/lib/treat/formatters/readers/doc.rb +3 -1
- data/lib/treat/formatters/readers/html.rb +1 -1
- data/lib/treat/formatters/readers/image.rb +43 -0
- data/lib/treat/formatters/readers/odt.rb +1 -2
- data/lib/treat/formatters/readers/pdf.rb +9 -1
- data/lib/treat/formatters/readers/xml.rb +40 -0
- data/lib/treat/formatters/serializers/xml.rb +50 -14
- data/lib/treat/formatters/serializers/yaml.rb +7 -2
- data/lib/treat/formatters/unserializers/xml.rb +33 -7
- data/lib/treat/formatters/visualizers/dot.rb +90 -20
- data/lib/treat/formatters/visualizers/short_value.rb +2 -2
- data/lib/treat/formatters/visualizers/standoff.rb +2 -2
- data/lib/treat/formatters/visualizers/tree.rb +1 -1
- data/lib/treat/formatters/visualizers/txt.rb +13 -4
- data/lib/treat/group.rb +16 -10
- data/lib/treat/helpers/linguistics_loader.rb +18 -0
- data/lib/treat/inflectors.rb +10 -0
- data/lib/treat/inflectors/cardinal_words/linguistics.rb +3 -3
- data/lib/treat/inflectors/conjugations/linguistics.rb +5 -12
- data/lib/treat/inflectors/declensions/english.rb +319 -0
- data/lib/treat/inflectors/declensions/linguistics.rb +12 -11
- data/lib/treat/inflectors/ordinal_words/linguistics.rb +3 -3
- data/lib/treat/install.rb +59 -0
- data/lib/treat/kernel.rb +18 -8
- data/lib/treat/languages.rb +18 -11
- data/lib/treat/languages/arabic.rb +4 -2
- data/lib/treat/languages/chinese.rb +6 -2
- data/lib/treat/languages/dutch.rb +16 -0
- data/lib/treat/languages/english.rb +47 -19
- data/lib/treat/languages/french.rb +8 -5
- data/lib/treat/languages/german.rb +9 -6
- data/lib/treat/languages/greek.rb +16 -0
- data/lib/treat/languages/italian.rb +6 -3
- data/lib/treat/languages/polish.rb +16 -0
- data/lib/treat/languages/portuguese.rb +16 -0
- data/lib/treat/languages/russian.rb +16 -0
- data/lib/treat/languages/spanish.rb +16 -0
- data/lib/treat/languages/swedish.rb +16 -0
- data/lib/treat/languages/tags.rb +377 -0
- data/lib/treat/lexicalizers.rb +34 -23
- data/lib/treat/lexicalizers/category/from_tag.rb +17 -10
- data/lib/treat/lexicalizers/linkages/naive.rb +51 -51
- data/lib/treat/lexicalizers/synsets/wordnet.rb +5 -1
- data/lib/treat/lexicalizers/tag/brill.rb +35 -40
- data/lib/treat/lexicalizers/tag/lingua.rb +19 -14
- data/lib/treat/lexicalizers/tag/stanford.rb +59 -68
- data/lib/treat/lexicalizers/tag/tagger.rb +29 -0
- data/lib/treat/processors.rb +8 -8
- data/lib/treat/processors/chunkers/txt.rb +4 -4
- data/lib/treat/processors/parsers/enju.rb +114 -99
- data/lib/treat/processors/parsers/stanford.rb +109 -41
- data/lib/treat/processors/segmenters/punkt.rb +17 -18
- data/lib/treat/processors/segmenters/punkt/dutch.yaml +9716 -0
- data/lib/treat/processors/segmenters/punkt/english.yaml +10340 -0
- data/lib/treat/processors/segmenters/punkt/french.yaml +43159 -0
- data/lib/treat/processors/segmenters/punkt/german.yaml +9572 -0
- data/lib/treat/processors/segmenters/punkt/greek.yaml +6050 -0
- data/lib/treat/processors/segmenters/punkt/italian.yaml +14748 -0
- data/lib/treat/processors/segmenters/punkt/polish.yaml +9751 -0
- data/lib/treat/processors/segmenters/punkt/portuguese.yaml +13662 -0
- data/lib/treat/processors/segmenters/punkt/russian.yaml +4237 -0
- data/lib/treat/processors/segmenters/punkt/spanish.yaml +24034 -0
- data/lib/treat/processors/segmenters/punkt/swedish.yaml +10001 -0
- data/lib/treat/processors/segmenters/stanford.rb +38 -37
- data/lib/treat/processors/segmenters/tactful.rb +5 -4
- data/lib/treat/processors/tokenizers/macintyre.rb +7 -6
- data/lib/treat/processors/tokenizers/multilingual.rb +2 -3
- data/lib/treat/processors/tokenizers/perl.rb +2 -2
- data/lib/treat/processors/tokenizers/punkt.rb +6 -2
- data/lib/treat/processors/tokenizers/stanford.rb +25 -24
- data/lib/treat/processors/tokenizers/tactful.rb +1 -2
- data/lib/treat/proxies.rb +2 -35
- data/lib/treat/registrable.rb +17 -22
- data/lib/treat/sugar.rb +11 -11
- data/lib/treat/tree.rb +27 -17
- data/lib/treat/viewable.rb +29 -0
- data/lib/treat/visitable.rb +1 -1
- data/test/tc_entity.rb +56 -49
- data/test/tc_extractors.rb +41 -18
- data/test/tc_formatters.rb +7 -8
- data/test/tc_inflectors.rb +19 -24
- data/test/tc_lexicalizers.rb +12 -19
- data/test/tc_processors.rb +26 -12
- data/test/tc_resources.rb +2 -7
- data/test/tc_treat.rb +20 -22
- data/test/tc_tree.rb +4 -4
- data/test/tests.rb +3 -5
- data/test/texts.rb +13 -14
- data/tmp/INFO +1 -0
- metadata +78 -158
- data/bin/INFO +0 -1
- data/examples/benchmark.rb +0 -81
- data/examples/keywords.rb +0 -148
- data/lib/treat/detectors.rb +0 -31
- data/lib/treat/detectors/encoding/r_chardet19.rb +0 -27
- data/lib/treat/detectors/format/file.rb +0 -36
- data/lib/treat/detectors/language/what_language.rb +0 -29
- data/lib/treat/entities/constituents.rb +0 -15
- data/lib/treat/entities/sentence.rb +0 -8
- data/lib/treat/extractors/named_entity/abner.rb +0 -20
- data/lib/treat/extractors/named_entity/stanford.rb +0 -174
- data/lib/treat/extractors/statistics/frequency_of.rb +0 -15
- data/lib/treat/extractors/time/chronic.rb +0 -20
- data/lib/treat/extractors/time/native.rb +0 -18
- data/lib/treat/formatters/readers/gocr.rb +0 -26
- data/lib/treat/formatters/readers/ocropus.rb +0 -31
- data/lib/treat/formatters/visualizers/html.rb +0 -13
- data/lib/treat/formatters/visualizers/inspect.rb +0 -20
- data/lib/treat/inflectors/declensions/en.rb +0 -18
- data/lib/treat/languages/categories.rb +0 -5
- data/lib/treat/languages/english/categories.rb +0 -23
- data/lib/treat/languages/english/tags.rb +0 -352
- data/lib/treat/languages/xinhua.rb +0 -12
- data/lib/treat/lexicalizers/synsets/rita_wn.rb +0 -23
- data/lib/treat/string.rb +0 -5
- data/test/tc_detectors.rb +0 -26
data/test/tc_resources.rb
CHANGED
@@ -3,8 +3,8 @@ module Treat
|
|
3
3
|
class TestLanguages < Test::Unit::TestCase
|
4
4
|
|
5
5
|
def test_languages
|
6
|
-
assert_equal :eng, Treat::Languages.
|
7
|
-
assert_equal :en, Treat::Languages.
|
6
|
+
assert_equal :eng, Treat::Languages.code(:english, 2)
|
7
|
+
assert_equal :en, Treat::Languages.code(:english, 1)
|
8
8
|
assert_equal :english, Treat::Languages.describe(:eng)
|
9
9
|
assert_equal :english, Treat::Languages.describe(:en)
|
10
10
|
end
|
@@ -17,11 +17,6 @@ module Treat
|
|
17
17
|
|
18
18
|
end
|
19
19
|
|
20
|
-
def test_edges
|
21
|
-
|
22
|
-
end
|
23
|
-
|
24
20
|
end
|
25
|
-
|
26
21
|
end
|
27
22
|
end
|
data/test/tc_treat.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
module Treat
|
2
2
|
module Tests
|
3
3
|
class TestTreat < Test::Unit::TestCase
|
4
|
-
|
4
|
+
|
5
5
|
def test_edulcoration
|
6
|
-
Treat.
|
7
|
-
assert_equal true, Treat.
|
6
|
+
Treat.sweeten!
|
7
|
+
assert_equal true, Treat.sweetened?
|
8
8
|
Treat::Entities.list.each do |klass|
|
9
9
|
next if klass == :symbol
|
10
10
|
assert_nothing_raised do
|
@@ -16,9 +16,9 @@ module Treat
|
|
16
16
|
raise
|
17
17
|
end
|
18
18
|
end
|
19
|
-
end
|
20
|
-
Treat.
|
21
|
-
assert_equal false, Treat.
|
19
|
+
end
|
20
|
+
Treat.unsweeten!
|
21
|
+
assert_equal false, Treat.sweetened?
|
22
22
|
Treat::Entities.list.each do |klass|
|
23
23
|
next if klass == :symbol
|
24
24
|
assert_raise(NoMethodError) do
|
@@ -26,37 +26,35 @@ module Treat
|
|
26
26
|
end
|
27
27
|
end
|
28
28
|
end
|
29
|
-
|
29
|
+
|
30
30
|
def test_modules_loaded?
|
31
31
|
['exception',
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
'inflectors',
|
32
|
+
'languages',
|
33
|
+
'entities',
|
34
|
+
'feature',
|
35
|
+
'category',
|
36
|
+
'group',
|
37
|
+
'formatters',
|
38
|
+
'processors',
|
39
|
+
'lexicalizers',
|
40
|
+
'extractors',
|
41
|
+
'inflectors',
|
43
42
|
'proxies'].each do |klass|
|
44
43
|
assert_nothing_raised do
|
45
44
|
Treat.const_get klass.capitalize
|
46
45
|
end
|
47
46
|
end
|
48
47
|
end
|
49
|
-
|
48
|
+
|
50
49
|
def test_paths
|
51
50
|
assert_not_nil Treat.lib
|
52
|
-
assert_not_nil Treat.bin
|
53
51
|
assert_not_nil Treat.test
|
54
52
|
end
|
55
|
-
|
53
|
+
|
56
54
|
def test_file_permissions
|
57
55
|
assert_equal true, File.writable?(Treat.lib + '/../tmp')
|
58
56
|
end
|
59
|
-
|
57
|
+
|
60
58
|
end
|
61
59
|
end
|
62
60
|
end
|
data/test/tc_tree.rb
CHANGED
@@ -8,7 +8,7 @@ module Treat
|
|
8
8
|
@leaf = Treat::Tree::Node.new('leaf node', 'leaf')
|
9
9
|
@root << @branch << @leaf
|
10
10
|
@root << @sibling
|
11
|
-
@leaf.
|
11
|
+
@leaf.link(@sibling, 'some dependency')
|
12
12
|
end
|
13
13
|
def test_branching
|
14
14
|
assert_equal 2, @root.children.size
|
@@ -50,9 +50,9 @@ module Treat
|
|
50
50
|
assert_equal true, @branch.has_parent?
|
51
51
|
assert_equal true, @leaf.has_parent?
|
52
52
|
|
53
|
-
assert_equal false, @root.
|
54
|
-
assert_equal false, @branch.
|
55
|
-
assert_equal true, @leaf.
|
53
|
+
assert_equal false, @root.has_dependencies?
|
54
|
+
assert_equal false, @branch.has_dependencies?
|
55
|
+
assert_equal true, @leaf.has_dependencies?
|
56
56
|
|
57
57
|
end
|
58
58
|
end
|
data/test/tests.rb
CHANGED
@@ -4,19 +4,17 @@ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
|
4
4
|
|
5
5
|
require 'treat'
|
6
6
|
|
7
|
-
|
8
|
-
# Treat.bin = '/ruby/nat/bin' # Remove for release
|
7
|
+
#$LOAD_PATH << '/ruby/gems/treat/test' # Remove for release
|
9
8
|
|
10
9
|
require 'texts'
|
11
10
|
|
11
|
+
# This is roughly in order of dependence.
|
12
12
|
require 'tc_treat'
|
13
13
|
require 'tc_tree'
|
14
14
|
require 'tc_entity'
|
15
15
|
require 'tc_resources'
|
16
|
-
|
17
|
-
require 'tc_detectors'
|
18
16
|
require 'tc_formatters'
|
19
17
|
require 'tc_inflectors'
|
20
18
|
require 'tc_lexicalizers'
|
21
19
|
require 'tc_processors'
|
22
|
-
require 'tc_extractors'
|
20
|
+
#require 'tc_extractors'
|
data/test/texts.rb
CHANGED
@@ -1,20 +1,19 @@
|
|
1
1
|
module Treat
|
2
2
|
module Tests
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
Number = Treat::Entities::Number(20)
|
4
|
+
module English
|
5
|
+
Collection = Treat::Entities::Collection "#{Treat.test}/texts/english"
|
6
|
+
LongDoc = Treat::Entities::Document "#{Treat.test}/texts/english/long.txt"
|
7
|
+
MediumDoc = Treat::Entities::Document "#{Treat.test}/texts/english/medium.txt"
|
8
|
+
ShortDoc = Treat::Entities::Document "#{Treat.test}/texts/english/short.txt"
|
9
|
+
Time = Treat::Entities::Phrase 'every Tuesday at 3:00'
|
10
|
+
Date = Treat::Entities::Phrase '2011/02/01'
|
11
|
+
Sentence = Treat::Entities::Sentence 'The quick brown fox jumped over the lazy dog.'
|
12
|
+
Verb = Treat::Entities::Word 'run'
|
13
|
+
Word = Treat::Entities::Word 'running'
|
14
|
+
Noun = Treat::Entities::Word 'captain'
|
15
|
+
Number = Treat::Entities::Number 20
|
16
|
+
end
|
18
17
|
|
19
18
|
end
|
20
19
|
end
|
data/tmp/INFO
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
This is a folder for temporary files created by Treat.
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: treat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-02-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rjb
|
16
|
-
requirement: &
|
16
|
+
requirement: &70310096172480 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,21 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70310096172480
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
|
-
name:
|
27
|
-
requirement: &
|
28
|
-
none: false
|
29
|
-
requirements:
|
30
|
-
- - ! '>='
|
31
|
-
- !ruby/object:Gem::Version
|
32
|
-
version: '0'
|
33
|
-
type: :runtime
|
34
|
-
prerelease: false
|
35
|
-
version_requirements: *70243259829420
|
36
|
-
- !ruby/object:Gem::Dependency
|
37
|
-
name: chronic
|
38
|
-
requirement: &70243259828180 !ruby/object:Gem::Requirement
|
26
|
+
name: zip
|
27
|
+
requirement: &70310096171240 !ruby/object:Gem::Requirement
|
39
28
|
none: false
|
40
29
|
requirements:
|
41
30
|
- - ! '>='
|
@@ -43,10 +32,10 @@ dependencies:
|
|
43
32
|
version: '0'
|
44
33
|
type: :runtime
|
45
34
|
prerelease: false
|
46
|
-
version_requirements: *
|
35
|
+
version_requirements: *70310096171240
|
47
36
|
- !ruby/object:Gem::Dependency
|
48
37
|
name: hpricot
|
49
|
-
requirement: &
|
38
|
+
requirement: &70310096170020 !ruby/object:Gem::Requirement
|
50
39
|
none: false
|
51
40
|
requirements:
|
52
41
|
- - ! '>='
|
@@ -54,10 +43,10 @@ dependencies:
|
|
54
43
|
version: '0'
|
55
44
|
type: :runtime
|
56
45
|
prerelease: false
|
57
|
-
version_requirements: *
|
46
|
+
version_requirements: *70310096170020
|
58
47
|
- !ruby/object:Gem::Dependency
|
59
|
-
name:
|
60
|
-
requirement: &
|
48
|
+
name: nokogiri
|
49
|
+
requirement: &70310096168860 !ruby/object:Gem::Requirement
|
61
50
|
none: false
|
62
51
|
requirements:
|
63
52
|
- - ! '>='
|
@@ -65,10 +54,10 @@ dependencies:
|
|
65
54
|
version: '0'
|
66
55
|
type: :runtime
|
67
56
|
prerelease: false
|
68
|
-
version_requirements: *
|
57
|
+
version_requirements: *70310096168860
|
69
58
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
71
|
-
requirement: &
|
59
|
+
name: psych
|
60
|
+
requirement: &70310096166960 !ruby/object:Gem::Requirement
|
72
61
|
none: false
|
73
62
|
requirements:
|
74
63
|
- - ! '>='
|
@@ -76,43 +65,10 @@ dependencies:
|
|
76
65
|
version: '0'
|
77
66
|
type: :runtime
|
78
67
|
prerelease: false
|
79
|
-
version_requirements: *
|
68
|
+
version_requirements: *70310096166960
|
80
69
|
- !ruby/object:Gem::Dependency
|
81
70
|
name: whatlanguage
|
82
|
-
requirement: &
|
83
|
-
none: false
|
84
|
-
requirements:
|
85
|
-
- - ! '>='
|
86
|
-
- !ruby/object:Gem::Version
|
87
|
-
version: '0'
|
88
|
-
type: :runtime
|
89
|
-
prerelease: false
|
90
|
-
version_requirements: *70243259837640
|
91
|
-
- !ruby/object:Gem::Dependency
|
92
|
-
name: wordnet
|
93
|
-
requirement: &70243259835240 !ruby/object:Gem::Requirement
|
94
|
-
none: false
|
95
|
-
requirements:
|
96
|
-
- - ! '>='
|
97
|
-
- !ruby/object:Gem::Version
|
98
|
-
version: '0'
|
99
|
-
type: :runtime
|
100
|
-
prerelease: false
|
101
|
-
version_requirements: *70243259835240
|
102
|
-
- !ruby/object:Gem::Dependency
|
103
|
-
name: rbtagger
|
104
|
-
requirement: &70243259832320 !ruby/object:Gem::Requirement
|
105
|
-
none: false
|
106
|
-
requirements:
|
107
|
-
- - ! '>='
|
108
|
-
- !ruby/object:Gem::Version
|
109
|
-
version: '0'
|
110
|
-
type: :runtime
|
111
|
-
prerelease: false
|
112
|
-
version_requirements: *70243259832320
|
113
|
-
- !ruby/object:Gem::Dependency
|
114
|
-
name: engtagger
|
115
|
-
requirement: &70243259839180 !ruby/object:Gem::Requirement
|
71
|
+
requirement: &70310096163460 !ruby/object:Gem::Requirement
|
116
72
|
none: false
|
117
73
|
requirements:
|
118
74
|
- - ! '>='
|
@@ -120,54 +76,10 @@ dependencies:
|
|
120
76
|
version: '0'
|
121
77
|
type: :runtime
|
122
78
|
prerelease: false
|
123
|
-
version_requirements: *
|
124
|
-
- !ruby/object:Gem::Dependency
|
125
|
-
name: punkt-segmenter
|
126
|
-
requirement: &70243259836880 !ruby/object:Gem::Requirement
|
127
|
-
none: false
|
128
|
-
requirements:
|
129
|
-
- - ! '>='
|
130
|
-
- !ruby/object:Gem::Version
|
131
|
-
version: '0'
|
132
|
-
type: :runtime
|
133
|
-
prerelease: false
|
134
|
-
version_requirements: *70243259836880
|
135
|
-
- !ruby/object:Gem::Dependency
|
136
|
-
name: tokenizer
|
137
|
-
requirement: &70243259834100 !ruby/object:Gem::Requirement
|
138
|
-
none: false
|
139
|
-
requirements:
|
140
|
-
- - ! '>='
|
141
|
-
- !ruby/object:Gem::Version
|
142
|
-
version: '0'
|
143
|
-
type: :runtime
|
144
|
-
prerelease: false
|
145
|
-
version_requirements: *70243259834100
|
146
|
-
- !ruby/object:Gem::Dependency
|
147
|
-
name: tactful_tokenizer
|
148
|
-
requirement: &70243259830340 !ruby/object:Gem::Requirement
|
149
|
-
none: false
|
150
|
-
requirements:
|
151
|
-
- - ! '>='
|
152
|
-
- !ruby/object:Gem::Version
|
153
|
-
version: '0'
|
154
|
-
type: :runtime
|
155
|
-
prerelease: false
|
156
|
-
version_requirements: *70243259830340
|
157
|
-
- !ruby/object:Gem::Dependency
|
158
|
-
name: english
|
159
|
-
requirement: &70243259828860 !ruby/object:Gem::Requirement
|
160
|
-
none: false
|
161
|
-
requirements:
|
162
|
-
- - ! '>='
|
163
|
-
- !ruby/object:Gem::Version
|
164
|
-
version: '0'
|
165
|
-
type: :runtime
|
166
|
-
prerelease: false
|
167
|
-
version_requirements: *70243259828860
|
79
|
+
version_requirements: *70310096163460
|
168
80
|
- !ruby/object:Gem::Dependency
|
169
81
|
name: linguistics
|
170
|
-
requirement: &
|
82
|
+
requirement: &70310096159580 !ruby/object:Gem::Requirement
|
171
83
|
none: false
|
172
84
|
requirements:
|
173
85
|
- - ! '>='
|
@@ -175,10 +87,10 @@ dependencies:
|
|
175
87
|
version: '0'
|
176
88
|
type: :runtime
|
177
89
|
prerelease: false
|
178
|
-
version_requirements: *
|
90
|
+
version_requirements: *70310096159580
|
179
91
|
- !ruby/object:Gem::Dependency
|
180
|
-
name:
|
181
|
-
requirement: &
|
92
|
+
name: stanford-core-nlp
|
93
|
+
requirement: &70310096158080 !ruby/object:Gem::Requirement
|
182
94
|
none: false
|
183
95
|
requirements:
|
184
96
|
- - ! '>='
|
@@ -186,10 +98,10 @@ dependencies:
|
|
186
98
|
version: '0'
|
187
99
|
type: :runtime
|
188
100
|
prerelease: false
|
189
|
-
version_requirements: *
|
101
|
+
version_requirements: *70310096158080
|
190
102
|
- !ruby/object:Gem::Dependency
|
191
|
-
name:
|
192
|
-
requirement: &
|
103
|
+
name: punkt-segmenter
|
104
|
+
requirement: &70310096156900 !ruby/object:Gem::Requirement
|
193
105
|
none: false
|
194
106
|
requirements:
|
195
107
|
- - ! '>='
|
@@ -197,10 +109,10 @@ dependencies:
|
|
197
109
|
version: '0'
|
198
110
|
type: :runtime
|
199
111
|
prerelease: false
|
200
|
-
version_requirements: *
|
112
|
+
version_requirements: *70310096156900
|
201
113
|
- !ruby/object:Gem::Dependency
|
202
114
|
name: lda-ruby
|
203
|
-
requirement: &
|
115
|
+
requirement: &70310096155740 !ruby/object:Gem::Requirement
|
204
116
|
none: false
|
205
117
|
requirements:
|
206
118
|
- - ! '>='
|
@@ -208,10 +120,10 @@ dependencies:
|
|
208
120
|
version: '0'
|
209
121
|
type: :runtime
|
210
122
|
prerelease: false
|
211
|
-
version_requirements: *
|
123
|
+
version_requirements: *70310096155740
|
212
124
|
- !ruby/object:Gem::Dependency
|
213
|
-
name:
|
214
|
-
requirement: &
|
125
|
+
name: chronic
|
126
|
+
requirement: &70310096154280 !ruby/object:Gem::Requirement
|
215
127
|
none: false
|
216
128
|
requirements:
|
217
129
|
- - ! '>='
|
@@ -219,18 +131,7 @@ dependencies:
|
|
219
131
|
version: '0'
|
220
132
|
type: :runtime
|
221
133
|
prerelease: false
|
222
|
-
version_requirements: *
|
223
|
-
- !ruby/object:Gem::Dependency
|
224
|
-
name: unprof
|
225
|
-
requirement: &70243259805800 !ruby/object:Gem::Requirement
|
226
|
-
none: false
|
227
|
-
requirements:
|
228
|
-
- - ! '>='
|
229
|
-
- !ruby/object:Gem::Version
|
230
|
-
version: '0'
|
231
|
-
type: :development
|
232
|
-
prerelease: false
|
233
|
-
version_requirements: *70243259805800
|
134
|
+
version_requirements: *70310096154280
|
234
135
|
description: ! ' Treat is a toolkit for text retrieval, information extraction and
|
235
136
|
natural language processing. '
|
236
137
|
email:
|
@@ -239,35 +140,40 @@ executables: []
|
|
239
140
|
extensions: []
|
240
141
|
extra_rdoc_files: []
|
241
142
|
files:
|
143
|
+
- lib/economist/half_cocked_basel.txt
|
144
|
+
- lib/economist/hose_and_dry.doc
|
145
|
+
- lib/economist/hungarys_troubles.abw
|
146
|
+
- lib/economist/republican_nomination.pdf
|
147
|
+
- lib/economist/saving_the_euro.odt
|
148
|
+
- lib/economist/to_infinity_and_beyond.txt
|
149
|
+
- lib/economist/zero_sum.html
|
242
150
|
- lib/treat/buildable.rb
|
243
151
|
- lib/treat/categories.rb
|
244
152
|
- lib/treat/category.rb
|
245
153
|
- lib/treat/delegatable.rb
|
246
|
-
- lib/treat/
|
247
|
-
- lib/treat/detectors/format/file.rb
|
248
|
-
- lib/treat/detectors/language/language_detector.rb
|
249
|
-
- lib/treat/detectors/language/what_language.rb
|
250
|
-
- lib/treat/detectors.rb
|
154
|
+
- lib/treat/doable.rb
|
251
155
|
- lib/treat/entities/collection.rb
|
252
|
-
- lib/treat/entities/constituents.rb
|
253
156
|
- lib/treat/entities/document.rb
|
254
157
|
- lib/treat/entities/entity.rb
|
255
|
-
- lib/treat/entities/
|
158
|
+
- lib/treat/entities/phrases.rb
|
256
159
|
- lib/treat/entities/tokens.rb
|
257
160
|
- lib/treat/entities/zones.rb
|
258
161
|
- lib/treat/entities.rb
|
259
162
|
- lib/treat/exception.rb
|
260
|
-
- lib/treat/extractors/
|
261
|
-
- lib/treat/extractors/
|
262
|
-
- lib/treat/extractors/
|
163
|
+
- lib/treat/extractors/coreferences/stanford.rb
|
164
|
+
- lib/treat/extractors/date/chronic.rb
|
165
|
+
- lib/treat/extractors/date/ruby.rb
|
166
|
+
- lib/treat/extractors/keywords/tf_idf.rb
|
167
|
+
- lib/treat/extractors/keywords/topics_tf_idf.rb
|
168
|
+
- lib/treat/extractors/language/language_extractor.rb
|
169
|
+
- lib/treat/extractors/language/what_language.rb
|
170
|
+
- lib/treat/extractors/named_entity_tag/stanford.rb
|
171
|
+
- lib/treat/extractors/roles/naive.rb
|
263
172
|
- lib/treat/extractors/statistics/frequency_in.rb
|
264
|
-
- lib/treat/extractors/statistics/
|
265
|
-
- lib/treat/extractors/statistics/position_in_parent.rb
|
173
|
+
- lib/treat/extractors/statistics/position_in.rb
|
266
174
|
- lib/treat/extractors/statistics/tf_idf.rb
|
267
175
|
- lib/treat/extractors/statistics/transition_matrix.rb
|
268
176
|
- lib/treat/extractors/statistics/transition_probability.rb
|
269
|
-
- lib/treat/extractors/time/chronic.rb
|
270
|
-
- lib/treat/extractors/time/native.rb
|
271
177
|
- lib/treat/extractors/time/nickel.rb
|
272
178
|
- lib/treat/extractors/topic_words/lda/data.dat
|
273
179
|
- lib/treat/extractors/topic_words/lda/wiki.yml
|
@@ -278,63 +184,79 @@ files:
|
|
278
184
|
- lib/treat/extractors/topics/reuters.rb
|
279
185
|
- lib/treat/extractors.rb
|
280
186
|
- lib/treat/feature.rb
|
187
|
+
- lib/treat/features.rb
|
281
188
|
- lib/treat/formatters/readers/abw.rb
|
282
189
|
- lib/treat/formatters/readers/autoselect.rb
|
283
190
|
- lib/treat/formatters/readers/doc.rb
|
284
|
-
- lib/treat/formatters/readers/gocr.rb
|
285
191
|
- lib/treat/formatters/readers/html.rb
|
286
|
-
- lib/treat/formatters/readers/
|
192
|
+
- lib/treat/formatters/readers/image.rb
|
287
193
|
- lib/treat/formatters/readers/odt.rb
|
288
194
|
- lib/treat/formatters/readers/pdf.rb
|
289
195
|
- lib/treat/formatters/readers/txt.rb
|
196
|
+
- lib/treat/formatters/readers/xml.rb
|
290
197
|
- lib/treat/formatters/serializers/xml.rb
|
291
198
|
- lib/treat/formatters/serializers/yaml.rb
|
292
199
|
- lib/treat/formatters/unserializers/autoselect.rb
|
293
200
|
- lib/treat/formatters/unserializers/xml.rb
|
294
201
|
- lib/treat/formatters/unserializers/yaml.rb
|
295
202
|
- lib/treat/formatters/visualizers/dot.rb
|
296
|
-
- lib/treat/formatters/visualizers/html.rb
|
297
|
-
- lib/treat/formatters/visualizers/inspect.rb
|
298
203
|
- lib/treat/formatters/visualizers/short_value.rb
|
299
204
|
- lib/treat/formatters/visualizers/standoff.rb
|
300
205
|
- lib/treat/formatters/visualizers/tree.rb
|
301
206
|
- lib/treat/formatters/visualizers/txt.rb
|
302
207
|
- lib/treat/formatters.rb
|
303
208
|
- lib/treat/group.rb
|
209
|
+
- lib/treat/helpers/linguistics_loader.rb
|
304
210
|
- lib/treat/inflectors/cardinal_words/linguistics.rb
|
305
211
|
- lib/treat/inflectors/conjugations/linguistics.rb
|
306
|
-
- lib/treat/inflectors/declensions/
|
212
|
+
- lib/treat/inflectors/declensions/english.rb
|
307
213
|
- lib/treat/inflectors/declensions/linguistics.rb
|
308
214
|
- lib/treat/inflectors/ordinal_words/linguistics.rb
|
309
215
|
- lib/treat/inflectors/stem/porter.rb
|
310
216
|
- lib/treat/inflectors/stem/porter_c.rb
|
311
217
|
- lib/treat/inflectors/stem/uea.rb
|
312
218
|
- lib/treat/inflectors.rb
|
219
|
+
- lib/treat/install.rb
|
313
220
|
- lib/treat/kernel.rb
|
314
221
|
- lib/treat/languages/arabic.rb
|
315
|
-
- lib/treat/languages/categories.rb
|
316
222
|
- lib/treat/languages/chinese.rb
|
317
|
-
- lib/treat/languages/
|
318
|
-
- lib/treat/languages/english/tags.rb
|
223
|
+
- lib/treat/languages/dutch.rb
|
319
224
|
- lib/treat/languages/english.rb
|
320
225
|
- lib/treat/languages/french.rb
|
321
226
|
- lib/treat/languages/german.rb
|
227
|
+
- lib/treat/languages/greek.rb
|
322
228
|
- lib/treat/languages/italian.rb
|
323
229
|
- lib/treat/languages/list.txt
|
324
|
-
- lib/treat/languages/
|
230
|
+
- lib/treat/languages/polish.rb
|
231
|
+
- lib/treat/languages/portuguese.rb
|
232
|
+
- lib/treat/languages/russian.rb
|
233
|
+
- lib/treat/languages/spanish.rb
|
234
|
+
- lib/treat/languages/swedish.rb
|
235
|
+
- lib/treat/languages/tags.rb
|
325
236
|
- lib/treat/languages.rb
|
326
237
|
- lib/treat/lexicalizers/category/from_tag.rb
|
327
238
|
- lib/treat/lexicalizers/linkages/naive.rb
|
328
|
-
- lib/treat/lexicalizers/synsets/rita_wn.rb
|
329
239
|
- lib/treat/lexicalizers/synsets/wordnet.rb
|
330
240
|
- lib/treat/lexicalizers/tag/brill.rb
|
331
241
|
- lib/treat/lexicalizers/tag/lingua.rb
|
332
242
|
- lib/treat/lexicalizers/tag/stanford.rb
|
243
|
+
- lib/treat/lexicalizers/tag/tagger.rb
|
333
244
|
- lib/treat/lexicalizers.rb
|
334
245
|
- lib/treat/object.rb
|
335
246
|
- lib/treat/processors/chunkers/txt.rb
|
336
247
|
- lib/treat/processors/parsers/enju.rb
|
337
248
|
- lib/treat/processors/parsers/stanford.rb
|
249
|
+
- lib/treat/processors/segmenters/punkt/dutch.yaml
|
250
|
+
- lib/treat/processors/segmenters/punkt/english.yaml
|
251
|
+
- lib/treat/processors/segmenters/punkt/french.yaml
|
252
|
+
- lib/treat/processors/segmenters/punkt/german.yaml
|
253
|
+
- lib/treat/processors/segmenters/punkt/greek.yaml
|
254
|
+
- lib/treat/processors/segmenters/punkt/italian.yaml
|
255
|
+
- lib/treat/processors/segmenters/punkt/polish.yaml
|
256
|
+
- lib/treat/processors/segmenters/punkt/portuguese.yaml
|
257
|
+
- lib/treat/processors/segmenters/punkt/russian.yaml
|
258
|
+
- lib/treat/processors/segmenters/punkt/spanish.yaml
|
259
|
+
- lib/treat/processors/segmenters/punkt/swedish.yaml
|
338
260
|
- lib/treat/processors/segmenters/punkt.rb
|
339
261
|
- lib/treat/processors/segmenters/stanford.rb
|
340
262
|
- lib/treat/processors/segmenters/tactful.rb
|
@@ -347,13 +269,12 @@ files:
|
|
347
269
|
- lib/treat/processors.rb
|
348
270
|
- lib/treat/proxies.rb
|
349
271
|
- lib/treat/registrable.rb
|
350
|
-
- lib/treat/string.rb
|
351
272
|
- lib/treat/sugar.rb
|
352
273
|
- lib/treat/tree.rb
|
274
|
+
- lib/treat/viewable.rb
|
353
275
|
- lib/treat/visitable.rb
|
354
276
|
- lib/treat.rb
|
355
277
|
- test/profile.rb
|
356
|
-
- test/tc_detectors.rb
|
357
278
|
- test/tc_entity.rb
|
358
279
|
- test/tc_extractors.rb
|
359
280
|
- test/tc_formatters.rb
|
@@ -375,9 +296,7 @@ files:
|
|
375
296
|
- test/texts/english/short.txt
|
376
297
|
- test/texts/english/zero_sum.html
|
377
298
|
- test/texts.rb
|
378
|
-
-
|
379
|
-
- examples/keywords.rb
|
380
|
-
- bin/INFO
|
299
|
+
- tmp/INFO
|
381
300
|
- README
|
382
301
|
- TODO
|
383
302
|
- LICENSE
|
@@ -407,3 +326,4 @@ signing_key:
|
|
407
326
|
specification_version: 3
|
408
327
|
summary: Text retrieval, extraction and annotation toolkit
|
409
328
|
test_files: []
|
329
|
+
has_rdoc:
|