ots 0.4.3 → 0.4.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +80 -0
- data/dictionaries/bg.xml +101 -0
- data/dictionaries/ca.xml +141 -0
- data/dictionaries/cs.xml +161 -0
- data/dictionaries/cy.xml +118 -0
- data/dictionaries/da.xml +129 -0
- data/dictionaries/de.xml +354 -0
- data/dictionaries/el.xml +80 -0
- data/dictionaries/en.xml +606 -0
- data/dictionaries/eo.xml +171 -0
- data/dictionaries/es.xml +369 -0
- data/dictionaries/et.xml +172 -0
- data/dictionaries/eu.xml +77 -0
- data/dictionaries/fi.xml +105 -0
- data/dictionaries/fr.xml +199 -0
- data/dictionaries/ga.xml +124 -0
- data/dictionaries/gl.xml +290 -0
- data/dictionaries/he.xml +334 -0
- data/dictionaries/hu.xml +280 -0
- data/dictionaries/ia.xml +97 -0
- data/dictionaries/id.xml +75 -0
- data/dictionaries/is.xml +201 -0
- data/dictionaries/it.xml +206 -0
- data/dictionaries/lv.xml +77 -0
- data/dictionaries/mi.xml +76 -0
- data/dictionaries/ms.xml +160 -0
- data/dictionaries/mt.xml +73 -0
- data/dictionaries/nl.xml +245 -0
- data/dictionaries/nn.xml +264 -0
- data/dictionaries/pl.xml +92 -0
- data/dictionaries/pt.xml +365 -0
- data/dictionaries/ro.xml +163 -0
- data/dictionaries/ru.xml +150 -0
- data/dictionaries/sv.xml +255 -0
- data/dictionaries/tl.xml +67 -0
- data/dictionaries/tr.xml +65 -0
- data/dictionaries/uk.xml +98 -0
- data/dictionaries/yi.xml +293 -0
- data/ext/article.c +119 -0
- data/ext/dictionary.c +335 -0
- data/ext/extconf.rb +13 -14
- data/ext/grader-tc.c +185 -0
- data/ext/grader-tc.h +64 -0
- data/ext/grader-tf.c +116 -0
- data/ext/grader.c +85 -0
- data/ext/highlighter.c +128 -0
- data/ext/html.c +131 -0
- data/ext/libots.h +158 -0
- data/ext/ots.c +130 -151
- data/ext/ots.h +15 -0
- data/ext/parser.c +173 -0
- data/ext/relations.c +163 -0
- data/ext/stemmer.c +332 -0
- data/ext/text.c +98 -0
- data/ext/version.h +2 -0
- data/ext/wordlist.c +220 -0
- data/test/helper.rb +3 -0
- data/test/test_article.rb +52 -0
- data/test/test_ots.rb +23 -0
- metadata +122 -38
- data/README +0 -25
- data/VERSION +0 -1
- data/lib/ots.rb +0 -1
- data/test/ots_test.rb +0 -62
data/README
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
ots is an interface to libots - The open text summarizer
|
2
|
-
|
3
|
-
INSTALL:
|
4
|
-
|
5
|
-
sudo gem install ots --source http://gems.github.com
|
6
|
-
|
7
|
-
REQUIREMENT:
|
8
|
-
|
9
|
-
* Ruby >= 1.8.7 ( >= 1.9.1 recommended)
|
10
|
-
* rubygems >= 1.3.5
|
11
|
-
* ruby development libraries (debian: ruby1.8-dev, ruby1.9.1-dev)
|
12
|
-
* libxml2 development libraries (debian: libxml2-dev)
|
13
|
-
* libots development libraries (debian: libots-dev)
|
14
|
-
* glib2.0 development libraries (debian: libglib2.0-dev)
|
15
|
-
|
16
|
-
USAGE:
|
17
|
-
|
18
|
-
>> require "rubygems"
|
19
|
-
>> require "ots"
|
20
|
-
>> summarizer = ots.new
|
21
|
-
>> summarizer.parse("I think I need some ice cream to cool me off. It is too hot down under")
|
22
|
-
>> summarizer.title
|
23
|
-
=> [ "hot","cool","cream","ice","think" ]
|
24
|
-
>> summarizer.summarize(:lines => 1)
|
25
|
-
=> [ { :sentence => "I think I need some ice cream to cool me off", :score => 57 } ]
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.4.3
|
data/lib/ots.rb
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), %w(.. ext ots))
|
data/test/ots_test.rb
DELETED
@@ -1,62 +0,0 @@
|
|
1
|
-
require 'helper'
|
2
|
-
|
3
|
-
class OTSTest < Test::Unit::TestCase
|
4
|
-
|
5
|
-
SAMPLE = <<-TEXT
|
6
|
-
The hawksbill turtle is a critically endangered sea turtle belonging to the family Cheloniidae.
|
7
|
-
It is the only species in its genus. The species has a worldwide distribution, with Atlantic and
|
8
|
-
Pacific subspecies.
|
9
|
-
TEXT
|
10
|
-
|
11
|
-
context 'Title' do
|
12
|
-
should 'extract title from given document' do
|
13
|
-
ots = OTS.new
|
14
|
-
ots.parse SAMPLE
|
15
|
-
assert_equal 'species,turtle,subspecies,pacific,atlantic', ots.title
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
context 'Keywords' do
|
20
|
-
should 'extract keywords from given document' do
|
21
|
-
ots = OTS.new
|
22
|
-
ots.parse SAMPLE
|
23
|
-
assert_equal %W(
|
24
|
-
species turtle subspecies pacific atlantic distribution worldwide genus cheloniidae family
|
25
|
-
belonging sea endangered critically hawksbill
|
26
|
-
), ots.keywords
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
context 'Summary' do
|
31
|
-
should 'extract keywords from given document' do
|
32
|
-
ots = OTS.new
|
33
|
-
ots.parse SAMPLE
|
34
|
-
lines = ots.summarize(:lines => 2).map do |value|
|
35
|
-
{ :sentence => value[:sentence].gsub(/\n\s*/, ' ').strip, :score => value[:score] }
|
36
|
-
end
|
37
|
-
|
38
|
-
assert_equal [
|
39
|
-
{
|
40
|
-
:sentence => "The hawksbill turtle is a critically endangered sea turtle belonging to the family Cheloniidae.",
|
41
|
-
:score => 48
|
42
|
-
},
|
43
|
-
{
|
44
|
-
:sentence => "The species has a worldwide distribution, with Atlantic and Pacific subspecies.",
|
45
|
-
:score => 20
|
46
|
-
}
|
47
|
-
], lines
|
48
|
-
|
49
|
-
end
|
50
|
-
|
51
|
-
should 'utf8 encode strings properly' do
|
52
|
-
ots = OTS.new
|
53
|
-
text = "The hawksbill turtle\xE2\x80\x93is critically endangered."
|
54
|
-
text.force_encoding('UTF-8') if RUBY_VERSION >= "1.9"
|
55
|
-
|
56
|
-
ots.parse(text)
|
57
|
-
summary = ots.summarize(:lines => 1).first[:sentence]
|
58
|
-
assert_equal text, summary
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
end
|