treat 1.0.6 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +2 -4
- data/README.md +13 -12
- data/bin/MANIFEST +1 -0
- data/bin/stanford/bridge.jar +0 -0
- data/bin/stanford/joda-time.jar +0 -0
- data/bin/stanford/stanford-corenlp.jar +0 -0
- data/bin/stanford/stanford-parser.jar +0 -0
- data/bin/stanford/xom.jar +0 -0
- data/files/{www.economist.com/21552208 → 21552208.html} +86 -89
- data/files/{guides.rubyonrails.org/3_2_release_notes.html → 3_2_release_notes.html} +0 -0
- data/files/{INFO → MANIFEST} +0 -0
- data/files/{www.rubyinside.com/nethttp-cheat-sheet-2940.html → nethttp-cheat-sheet-2940.html} +12 -16
- data/files/weather-central-canada-heat-wave.html +1370 -0
- data/lib/treat/config/core/acronyms.rb +4 -0
- data/lib/treat/config/core/encodings.rb +8 -0
- data/lib/treat/config/core/entities.rb +2 -0
- data/lib/treat/config/core/language.rb +3 -0
- data/lib/treat/config/core/paths.rb +8 -0
- data/lib/treat/config/core/syntax.rb +1 -0
- data/lib/treat/config/core/verbosity.rb +1 -0
- data/lib/treat/config/databases/mongo.rb +3 -0
- data/lib/treat/config/languages/agnostic.rb +34 -0
- data/lib/treat/config/languages/arabic.rb +13 -0
- data/lib/treat/config/languages/chinese.rb +13 -0
- data/lib/treat/config/languages/dutch.rb +12 -0
- data/lib/treat/config/languages/english.rb +60 -0
- data/lib/treat/config/languages/french.rb +18 -0
- data/lib/treat/config/languages/german.rb +18 -0
- data/lib/treat/config/languages/greek.rb +12 -0
- data/lib/treat/config/languages/italian.rb +12 -0
- data/lib/treat/config/languages/polish.rb +12 -0
- data/lib/treat/config/languages/portuguese.rb +12 -0
- data/lib/treat/config/languages/russian.rb +12 -0
- data/lib/treat/config/languages/spanish.rb +12 -0
- data/lib/treat/config/languages/swedish.rb +12 -0
- data/lib/treat/config/libraries/stanford.rb +1 -0
- data/lib/treat/config/linguistics/categories.rb +4 -0
- data/lib/treat/config/linguistics/punctuation.rb +33 -0
- data/lib/treat/config/tags/aligned.rb +221 -0
- data/lib/treat/config/tags/enju.rb +71 -0
- data/lib/treat/config/tags/paris7.rb +17 -0
- data/lib/treat/config/tags/ptb.rb +15 -0
- data/lib/treat/config/workers/extractors.rb +39 -0
- data/lib/treat/config/workers/formatters.rb +20 -0
- data/lib/treat/config/workers/inflectors.rb +27 -0
- data/lib/treat/config/workers/learners.rb +6 -0
- data/lib/treat/config/workers/lexicalizers.rb +18 -0
- data/lib/treat/config/workers/list.rb +1 -0
- data/lib/treat/config/workers/processors.rb +19 -0
- data/lib/treat/config/workers/retrievers.rb +12 -0
- data/lib/treat/config.rb +125 -0
- data/lib/treat/{classification.rb → core/classification.rb} +1 -1
- data/lib/treat/{data_set.rb → core/data_set.rb} +1 -4
- data/lib/treat/{tree.rb → core/node.rb} +5 -5
- data/lib/treat/core/server.rb +3 -0
- data/lib/treat/core.rb +5 -0
- data/lib/treat/entities/abilities/buildable.rb +61 -56
- data/lib/treat/entities/abilities/checkable.rb +2 -2
- data/lib/treat/entities/abilities/comparable.rb +21 -0
- data/lib/treat/entities/abilities/copyable.rb +2 -0
- data/lib/treat/entities/abilities/countable.rb +1 -1
- data/lib/treat/entities/abilities/debuggable.rb +1 -1
- data/lib/treat/entities/abilities/delegatable.rb +42 -36
- data/lib/treat/entities/abilities/doable.rb +2 -2
- data/lib/treat/entities/abilities/exportable.rb +1 -1
- data/lib/treat/entities/abilities/iterable.rb +21 -33
- data/lib/treat/entities/abilities/magical.rb +8 -8
- data/lib/treat/entities/abilities/registrable.rb +0 -38
- data/lib/treat/entities/abilities/stringable.rb +19 -19
- data/lib/treat/entities/collection.rb +31 -0
- data/lib/treat/entities/document.rb +10 -0
- data/lib/treat/entities/entity.rb +18 -13
- data/lib/treat/entities/group.rb +15 -0
- data/lib/treat/entities/section.rb +13 -0
- data/lib/treat/entities/token.rb +35 -0
- data/lib/treat/entities/zone.rb +11 -0
- data/lib/treat/entities.rb +5 -75
- data/lib/treat/helpers/didyoumean.rb +57 -0
- data/lib/treat/helpers/escaping.rb +15 -0
- data/lib/treat/helpers/formatting.rb +41 -0
- data/lib/treat/helpers/platform.rb +15 -0
- data/lib/treat/helpers/reflection.rb +17 -0
- data/lib/treat/helpers/temporary.rb +27 -0
- data/lib/treat/helpers/verbosity.rb +19 -0
- data/lib/treat/helpers.rb +5 -0
- data/lib/treat/installer.rb +46 -165
- data/lib/treat/loaders/linguistics.rb +22 -27
- data/lib/treat/loaders/stanford.rb +23 -41
- data/lib/treat/loaders.rb +10 -0
- data/lib/treat/proxies.rb +73 -24
- data/lib/treat/version.rb +3 -0
- data/lib/treat/{extractors → workers/extractors}/keywords/tf_idf.rb +1 -1
- data/lib/treat/{extractors → workers/extractors}/language/what_language.rb +11 -4
- data/lib/treat/{extractors → workers/extractors}/name_tag/stanford.rb +3 -4
- data/lib/treat/{extractors → workers/extractors}/tf_idf/native.rb +4 -5
- data/lib/treat/{extractors → workers/extractors}/time/chronic.rb +1 -1
- data/lib/treat/{extractors → workers/extractors}/time/nickel.rb +1 -1
- data/lib/treat/{extractors → workers/extractors}/time/ruby.rb +1 -1
- data/lib/treat/{extractors → workers/extractors}/topic_words/lda.rb +1 -1
- data/lib/treat/{extractors → workers/extractors}/topics/reuters.rb +4 -4
- data/lib/treat/{formatters → workers/formatters}/readers/abw.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/autoselect.rb +10 -3
- data/lib/treat/{formatters → workers/formatters}/readers/doc.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/html.rb +4 -4
- data/lib/treat/{formatters → workers/formatters}/readers/image.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/odt.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/pdf.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/txt.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/xml.rb +2 -2
- data/lib/treat/workers/formatters/serializers/mongo.rb +60 -0
- data/lib/treat/{formatters → workers/formatters}/serializers/xml.rb +1 -2
- data/lib/treat/{formatters → workers/formatters}/serializers/yaml.rb +1 -1
- data/lib/treat/{formatters → workers/formatters}/unserializers/autoselect.rb +3 -1
- data/lib/treat/workers/formatters/unserializers/mongo.rb +80 -0
- data/lib/treat/{formatters → workers/formatters}/unserializers/xml.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/unserializers/yaml.rb +1 -1
- data/lib/treat/{formatters → workers/formatters}/visualizers/dot.rb +1 -1
- data/lib/treat/{formatters → workers/formatters}/visualizers/standoff.rb +2 -3
- data/lib/treat/{formatters → workers/formatters}/visualizers/tree.rb +1 -1
- data/lib/treat/{groupable.rb → workers/group.rb} +6 -12
- data/lib/treat/{inflectors → workers/inflectors}/cardinalizers/linguistics.rb +7 -2
- data/lib/treat/{inflectors → workers/inflectors}/conjugators/linguistics.rb +11 -11
- data/lib/treat/{inflectors → workers/inflectors}/declensors/active_support.rb +2 -2
- data/lib/treat/{inflectors → workers/inflectors}/declensors/english/inflect.rb +1 -1
- data/lib/treat/{inflectors → workers/inflectors}/declensors/english.rb +2 -2
- data/lib/treat/{inflectors → workers/inflectors}/declensors/linguistics.rb +4 -4
- data/lib/treat/{inflectors → workers/inflectors}/ordinalizers/linguistics.rb +8 -2
- data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter.rb +2 -2
- data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter_c.rb +1 -1
- data/lib/treat/{inflectors → workers/inflectors}/stemmers/uea.rb +1 -1
- data/lib/treat/{ai → workers/learners}/classifiers/id3.rb +1 -1
- data/lib/treat/{ai → workers/learners}/classifiers/mlp.rb +1 -1
- data/lib/treat/{lexicalizers → workers/lexicalizers}/categorizers/from_tag.rb +9 -9
- data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet/synset.rb +2 -2
- data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet.rb +4 -4
- data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill/patch.rb +2 -2
- data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill.rb +2 -8
- data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/lingua.rb +1 -6
- data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/stanford.rb +31 -42
- data/lib/treat/workers/processors/chunkers/autoselect.rb +19 -0
- data/lib/treat/{processors → workers/processors}/chunkers/html.rb +4 -3
- data/lib/treat/workers/processors/chunkers/txt.rb +32 -0
- data/lib/treat/{processors → workers/processors}/parsers/enju.rb +3 -3
- data/lib/treat/{processors → workers/processors}/parsers/stanford.rb +6 -8
- data/lib/treat/{processors → workers/processors}/segmenters/punkt.rb +6 -10
- data/lib/treat/{processors → workers/processors}/segmenters/stanford.rb +2 -2
- data/lib/treat/{processors → workers/processors}/segmenters/tactful.rb +3 -6
- data/lib/treat/{processors → workers/processors}/tokenizers/ptb.rb +6 -5
- data/lib/treat/{processors → workers/processors}/tokenizers/punkt.rb +1 -1
- data/lib/treat/{processors → workers/processors}/tokenizers/stanford.rb +1 -1
- data/lib/treat/{processors → workers/processors}/tokenizers/tactful.rb +3 -5
- data/lib/treat/{retrievers → workers/retrievers}/indexers/ferret.rb +1 -1
- data/lib/treat/{retrievers → workers/retrievers}/searchers/ferret.rb +1 -1
- data/lib/treat/workers.rb +96 -0
- data/lib/treat.rb +23 -49
- data/spec/collection.rb +4 -4
- data/spec/document.rb +5 -5
- data/spec/entity.rb +33 -32
- data/spec/{tree.rb → node.rb} +5 -5
- data/spec/phrase.rb +5 -39
- data/spec/sandbox.rb +212 -6
- data/spec/token.rb +12 -9
- data/spec/treat.rb +12 -9
- data/spec/word.rb +10 -9
- data/spec/zone.rb +6 -2
- data/tmp/{INFO → MANIFEST} +0 -0
- data/tmp/english.yaml +10340 -0
- metadata +149 -139
- data/lib/treat/ai.rb +0 -12
- data/lib/treat/categories.rb +0 -90
- data/lib/treat/categorizable.rb +0 -44
- data/lib/treat/configurable.rb +0 -115
- data/lib/treat/dependencies.rb +0 -25
- data/lib/treat/downloader.rb +0 -87
- data/lib/treat/entities/abilities.rb +0 -10
- data/lib/treat/entities/entities.rb +0 -102
- data/lib/treat/exception.rb +0 -7
- data/lib/treat/extractors.rb +0 -79
- data/lib/treat/formatters/serializers/mongo.rb +0 -64
- data/lib/treat/formatters.rb +0 -41
- data/lib/treat/helpers/decimal_point_escaper.rb +0 -22
- data/lib/treat/inflectors.rb +0 -52
- data/lib/treat/kernel.rb +0 -208
- data/lib/treat/languages/arabic.rb +0 -16
- data/lib/treat/languages/chinese.rb +0 -16
- data/lib/treat/languages/dutch.rb +0 -16
- data/lib/treat/languages/english.rb +0 -63
- data/lib/treat/languages/french.rb +0 -20
- data/lib/treat/languages/german.rb +0 -20
- data/lib/treat/languages/greek.rb +0 -16
- data/lib/treat/languages/italian.rb +0 -17
- data/lib/treat/languages/language.rb +0 -10
- data/lib/treat/languages/list.txt +0 -504
- data/lib/treat/languages/polish.rb +0 -16
- data/lib/treat/languages/portuguese.rb +0 -16
- data/lib/treat/languages/russian.rb +0 -16
- data/lib/treat/languages/spanish.rb +0 -16
- data/lib/treat/languages/swedish.rb +0 -16
- data/lib/treat/languages.rb +0 -132
- data/lib/treat/lexicalizers.rb +0 -37
- data/lib/treat/object.rb +0 -7
- data/lib/treat/processors/chunkers/autoselect.rb +0 -16
- data/lib/treat/processors/chunkers/txt.rb +0 -21
- data/lib/treat/processors.rb +0 -38
- data/lib/treat/retrievers.rb +0 -27
- data/lib/treat/server.rb +0 -26
- data/lib/treat/universalisation/encodings.rb +0 -12
- data/lib/treat/universalisation/tags.rb +0 -453
- data/lib/treat/universalisation.rb +0 -9
- data/spec/languages.rb +0 -25
data/spec/zone.rb
CHANGED
@@ -4,15 +4,19 @@ describe Treat::Entities::Zone do
|
|
4
4
|
|
5
5
|
describe "Processable" do
|
6
6
|
|
7
|
+
before do
|
8
|
+
@processors = Treat.languages.
|
9
|
+
english[:workers][:processors]
|
10
|
+
end
|
7
11
|
describe "#segment" do
|
8
12
|
|
9
13
|
it "splits a zone into phrases/sentences and adds them as children of the zone" do
|
10
|
-
|
14
|
+
@processors[:segmenters].each do |s|
|
11
15
|
paragraph = Treat::Entities::Paragraph.new(
|
12
16
|
"This is a first sentence inside the first paragraph. " +
|
13
17
|
"This is the second sentence that is inside the paragraph.")
|
14
18
|
paragraph.segment(s)
|
15
|
-
paragraph.children.should eql paragraph.
|
19
|
+
paragraph.children.should eql paragraph.sentences
|
16
20
|
paragraph.phrases.map { |t| t.to_s }.should
|
17
21
|
eql ["This is a first sentence inside the first paragraph.",
|
18
22
|
"This is the second sentence that is inside the paragraph."]
|
data/tmp/{INFO → MANIFEST}
RENAMED
File without changes
|