ots 0.4.3 → 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. data/README.md +80 -0
  2. data/dictionaries/bg.xml +101 -0
  3. data/dictionaries/ca.xml +141 -0
  4. data/dictionaries/cs.xml +161 -0
  5. data/dictionaries/cy.xml +118 -0
  6. data/dictionaries/da.xml +129 -0
  7. data/dictionaries/de.xml +354 -0
  8. data/dictionaries/el.xml +80 -0
  9. data/dictionaries/en.xml +606 -0
  10. data/dictionaries/eo.xml +171 -0
  11. data/dictionaries/es.xml +369 -0
  12. data/dictionaries/et.xml +172 -0
  13. data/dictionaries/eu.xml +77 -0
  14. data/dictionaries/fi.xml +105 -0
  15. data/dictionaries/fr.xml +199 -0
  16. data/dictionaries/ga.xml +124 -0
  17. data/dictionaries/gl.xml +290 -0
  18. data/dictionaries/he.xml +334 -0
  19. data/dictionaries/hu.xml +280 -0
  20. data/dictionaries/ia.xml +97 -0
  21. data/dictionaries/id.xml +75 -0
  22. data/dictionaries/is.xml +201 -0
  23. data/dictionaries/it.xml +206 -0
  24. data/dictionaries/lv.xml +77 -0
  25. data/dictionaries/mi.xml +76 -0
  26. data/dictionaries/ms.xml +160 -0
  27. data/dictionaries/mt.xml +73 -0
  28. data/dictionaries/nl.xml +245 -0
  29. data/dictionaries/nn.xml +264 -0
  30. data/dictionaries/pl.xml +92 -0
  31. data/dictionaries/pt.xml +365 -0
  32. data/dictionaries/ro.xml +163 -0
  33. data/dictionaries/ru.xml +150 -0
  34. data/dictionaries/sv.xml +255 -0
  35. data/dictionaries/tl.xml +67 -0
  36. data/dictionaries/tr.xml +65 -0
  37. data/dictionaries/uk.xml +98 -0
  38. data/dictionaries/yi.xml +293 -0
  39. data/ext/article.c +119 -0
  40. data/ext/dictionary.c +335 -0
  41. data/ext/extconf.rb +13 -14
  42. data/ext/grader-tc.c +185 -0
  43. data/ext/grader-tc.h +64 -0
  44. data/ext/grader-tf.c +116 -0
  45. data/ext/grader.c +85 -0
  46. data/ext/highlighter.c +128 -0
  47. data/ext/html.c +131 -0
  48. data/ext/libots.h +158 -0
  49. data/ext/ots.c +130 -151
  50. data/ext/ots.h +15 -0
  51. data/ext/parser.c +173 -0
  52. data/ext/relations.c +163 -0
  53. data/ext/stemmer.c +332 -0
  54. data/ext/text.c +98 -0
  55. data/ext/version.h +2 -0
  56. data/ext/wordlist.c +220 -0
  57. data/test/helper.rb +3 -0
  58. data/test/test_article.rb +52 -0
  59. data/test/test_ots.rb +23 -0
  60. metadata +122 -38
  61. data/README +0 -25
  62. data/VERSION +0 -1
  63. data/lib/ots.rb +0 -1
  64. data/test/ots_test.rb +0 -62
data/README DELETED
@@ -1,25 +0,0 @@
1
- ots is an interface to libots - The open text summarizer
2
-
3
- INSTALL:
4
-
5
- sudo gem install ots --source http://gems.github.com
6
-
7
- REQUIREMENT:
8
-
9
- * Ruby >= 1.8.7 ( >= 1.9.1 recommended)
10
- * rubygems >= 1.3.5
11
- * ruby development libraries (debian: ruby1.8-dev, ruby1.9.1-dev)
12
- * libxml2 development libraries (debian: libxml2-dev)
13
- * libots development libraries (debian: libots-dev)
14
- * glib2.0 development libraries (debian: libglib2.0-dev)
15
-
16
- USAGE:
17
-
18
- >> require "rubygems"
19
- >> require "ots"
20
- >> summarizer = ots.new
21
- >> summarizer.parse("I think I need some ice cream to cool me off. It is too hot down under")
22
- >> summarizer.title
23
- => [ "hot","cool","cream","ice","think" ]
24
- >> summarizer.summarize(:lines => 1)
25
- => [ { :sentence => "I think I need some ice cream to cool me off", :score => 57 } ]
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.4.3
data/lib/ots.rb DELETED
@@ -1 +0,0 @@
1
- require File.join(File.dirname(__FILE__), %w(.. ext ots))
@@ -1,62 +0,0 @@
1
- require 'helper'
2
-
3
- class OTSTest < Test::Unit::TestCase
4
-
5
- SAMPLE = <<-TEXT
6
- The hawksbill turtle is a critically endangered sea turtle belonging to the family Cheloniidae.
7
- It is the only species in its genus. The species has a worldwide distribution, with Atlantic and
8
- Pacific subspecies.
9
- TEXT
10
-
11
- context 'Title' do
12
- should 'extract title from given document' do
13
- ots = OTS.new
14
- ots.parse SAMPLE
15
- assert_equal 'species,turtle,subspecies,pacific,atlantic', ots.title
16
- end
17
- end
18
-
19
- context 'Keywords' do
20
- should 'extract keywords from given document' do
21
- ots = OTS.new
22
- ots.parse SAMPLE
23
- assert_equal %W(
24
- species turtle subspecies pacific atlantic distribution worldwide genus cheloniidae family
25
- belonging sea endangered critically hawksbill
26
- ), ots.keywords
27
- end
28
- end
29
-
30
- context 'Summary' do
31
- should 'extract keywords from given document' do
32
- ots = OTS.new
33
- ots.parse SAMPLE
34
- lines = ots.summarize(:lines => 2).map do |value|
35
- { :sentence => value[:sentence].gsub(/\n\s*/, ' ').strip, :score => value[:score] }
36
- end
37
-
38
- assert_equal [
39
- {
40
- :sentence => "The hawksbill turtle is a critically endangered sea turtle belonging to the family Cheloniidae.",
41
- :score => 48
42
- },
43
- {
44
- :sentence => "The species has a worldwide distribution, with Atlantic and Pacific subspecies.",
45
- :score => 20
46
- }
47
- ], lines
48
-
49
- end
50
-
51
- should 'utf8 encode strings properly' do
52
- ots = OTS.new
53
- text = "The hawksbill turtle\xE2\x80\x93is critically endangered."
54
- text.force_encoding('UTF-8') if RUBY_VERSION >= "1.9"
55
-
56
- ots.parse(text)
57
- summary = ots.summarize(:lines => 1).first[:sentence]
58
- assert_equal text, summary
59
- end
60
- end
61
-
62
- end