ots 0.4.3 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +80 -0
- data/dictionaries/bg.xml +101 -0
- data/dictionaries/ca.xml +141 -0
- data/dictionaries/cs.xml +161 -0
- data/dictionaries/cy.xml +118 -0
- data/dictionaries/da.xml +129 -0
- data/dictionaries/de.xml +354 -0
- data/dictionaries/el.xml +80 -0
- data/dictionaries/en.xml +606 -0
- data/dictionaries/eo.xml +171 -0
- data/dictionaries/es.xml +369 -0
- data/dictionaries/et.xml +172 -0
- data/dictionaries/eu.xml +77 -0
- data/dictionaries/fi.xml +105 -0
- data/dictionaries/fr.xml +199 -0
- data/dictionaries/ga.xml +124 -0
- data/dictionaries/gl.xml +290 -0
- data/dictionaries/he.xml +334 -0
- data/dictionaries/hu.xml +280 -0
- data/dictionaries/ia.xml +97 -0
- data/dictionaries/id.xml +75 -0
- data/dictionaries/is.xml +201 -0
- data/dictionaries/it.xml +206 -0
- data/dictionaries/lv.xml +77 -0
- data/dictionaries/mi.xml +76 -0
- data/dictionaries/ms.xml +160 -0
- data/dictionaries/mt.xml +73 -0
- data/dictionaries/nl.xml +245 -0
- data/dictionaries/nn.xml +264 -0
- data/dictionaries/pl.xml +92 -0
- data/dictionaries/pt.xml +365 -0
- data/dictionaries/ro.xml +163 -0
- data/dictionaries/ru.xml +150 -0
- data/dictionaries/sv.xml +255 -0
- data/dictionaries/tl.xml +67 -0
- data/dictionaries/tr.xml +65 -0
- data/dictionaries/uk.xml +98 -0
- data/dictionaries/yi.xml +293 -0
- data/ext/article.c +119 -0
- data/ext/dictionary.c +335 -0
- data/ext/extconf.rb +13 -14
- data/ext/grader-tc.c +185 -0
- data/ext/grader-tc.h +64 -0
- data/ext/grader-tf.c +116 -0
- data/ext/grader.c +85 -0
- data/ext/highlighter.c +128 -0
- data/ext/html.c +131 -0
- data/ext/libots.h +158 -0
- data/ext/ots.c +130 -151
- data/ext/ots.h +15 -0
- data/ext/parser.c +173 -0
- data/ext/relations.c +163 -0
- data/ext/stemmer.c +332 -0
- data/ext/text.c +98 -0
- data/ext/version.h +2 -0
- data/ext/wordlist.c +220 -0
- data/test/helper.rb +3 -0
- data/test/test_article.rb +52 -0
- data/test/test_ots.rb +23 -0
- metadata +122 -38
- data/README +0 -25
- data/VERSION +0 -1
- data/lib/ots.rb +0 -1
- data/test/ots_test.rb +0 -62
data/README
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
ots is an interface to libots - The open text summarizer
|
2
|
-
|
3
|
-
INSTALL:
|
4
|
-
|
5
|
-
sudo gem install ots --source http://gems.github.com
|
6
|
-
|
7
|
-
REQUIREMENT:
|
8
|
-
|
9
|
-
* Ruby >= 1.8.7 ( >= 1.9.1 recommended)
|
10
|
-
* rubygems >= 1.3.5
|
11
|
-
* ruby development libraries (debian: ruby1.8-dev, ruby1.9.1-dev)
|
12
|
-
* libxml2 development libraries (debian: libxml2-dev)
|
13
|
-
* libots development libraries (debian: libots-dev)
|
14
|
-
* glib2.0 development libraries (debian: libglib2.0-dev)
|
15
|
-
|
16
|
-
USAGE:
|
17
|
-
|
18
|
-
>> require "rubygems"
|
19
|
-
>> require "ots"
|
20
|
-
>> summarizer = ots.new
|
21
|
-
>> summarizer.parse("I think I need some ice cream to cool me off. It is too hot down under")
|
22
|
-
>> summarizer.title
|
23
|
-
=> [ "hot","cool","cream","ice","think" ]
|
24
|
-
>> summarizer.summarize(:lines => 1)
|
25
|
-
=> [ { :sentence => "I think I need some ice cream to cool me off", :score => 57 } ]
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.4.3
|
data/lib/ots.rb
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), %w(.. ext ots))
|
data/test/ots_test.rb
DELETED
@@ -1,62 +0,0 @@
|
|
1
|
-
require 'helper'
|
2
|
-
|
3
|
-
class OTSTest < Test::Unit::TestCase
|
4
|
-
|
5
|
-
SAMPLE = <<-TEXT
|
6
|
-
The hawksbill turtle is a critically endangered sea turtle belonging to the family Cheloniidae.
|
7
|
-
It is the only species in its genus. The species has a worldwide distribution, with Atlantic and
|
8
|
-
Pacific subspecies.
|
9
|
-
TEXT
|
10
|
-
|
11
|
-
context 'Title' do
|
12
|
-
should 'extract title from given document' do
|
13
|
-
ots = OTS.new
|
14
|
-
ots.parse SAMPLE
|
15
|
-
assert_equal 'species,turtle,subspecies,pacific,atlantic', ots.title
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
context 'Keywords' do
|
20
|
-
should 'extract keywords from given document' do
|
21
|
-
ots = OTS.new
|
22
|
-
ots.parse SAMPLE
|
23
|
-
assert_equal %W(
|
24
|
-
species turtle subspecies pacific atlantic distribution worldwide genus cheloniidae family
|
25
|
-
belonging sea endangered critically hawksbill
|
26
|
-
), ots.keywords
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
context 'Summary' do
|
31
|
-
should 'extract keywords from given document' do
|
32
|
-
ots = OTS.new
|
33
|
-
ots.parse SAMPLE
|
34
|
-
lines = ots.summarize(:lines => 2).map do |value|
|
35
|
-
{ :sentence => value[:sentence].gsub(/\n\s*/, ' ').strip, :score => value[:score] }
|
36
|
-
end
|
37
|
-
|
38
|
-
assert_equal [
|
39
|
-
{
|
40
|
-
:sentence => "The hawksbill turtle is a critically endangered sea turtle belonging to the family Cheloniidae.",
|
41
|
-
:score => 48
|
42
|
-
},
|
43
|
-
{
|
44
|
-
:sentence => "The species has a worldwide distribution, with Atlantic and Pacific subspecies.",
|
45
|
-
:score => 20
|
46
|
-
}
|
47
|
-
], lines
|
48
|
-
|
49
|
-
end
|
50
|
-
|
51
|
-
should 'utf8 encode strings properly' do
|
52
|
-
ots = OTS.new
|
53
|
-
text = "The hawksbill turtle\xE2\x80\x93is critically endangered."
|
54
|
-
text.force_encoding('UTF-8') if RUBY_VERSION >= "1.9"
|
55
|
-
|
56
|
-
ots.parse(text)
|
57
|
-
summary = ots.summarize(:lines => 1).first[:sentence]
|
58
|
-
assert_equal text, summary
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
end
|