ots 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. data/README.md +80 -0
  2. data/dictionaries/bg.xml +101 -0
  3. data/dictionaries/ca.xml +141 -0
  4. data/dictionaries/cs.xml +161 -0
  5. data/dictionaries/cy.xml +118 -0
  6. data/dictionaries/da.xml +129 -0
  7. data/dictionaries/de.xml +354 -0
  8. data/dictionaries/el.xml +80 -0
  9. data/dictionaries/en.xml +606 -0
  10. data/dictionaries/eo.xml +171 -0
  11. data/dictionaries/es.xml +369 -0
  12. data/dictionaries/et.xml +172 -0
  13. data/dictionaries/eu.xml +77 -0
  14. data/dictionaries/fi.xml +105 -0
  15. data/dictionaries/fr.xml +199 -0
  16. data/dictionaries/ga.xml +124 -0
  17. data/dictionaries/gl.xml +290 -0
  18. data/dictionaries/he.xml +334 -0
  19. data/dictionaries/hu.xml +280 -0
  20. data/dictionaries/ia.xml +97 -0
  21. data/dictionaries/id.xml +75 -0
  22. data/dictionaries/is.xml +201 -0
  23. data/dictionaries/it.xml +206 -0
  24. data/dictionaries/lv.xml +77 -0
  25. data/dictionaries/mi.xml +76 -0
  26. data/dictionaries/ms.xml +160 -0
  27. data/dictionaries/mt.xml +73 -0
  28. data/dictionaries/nl.xml +245 -0
  29. data/dictionaries/nn.xml +264 -0
  30. data/dictionaries/pl.xml +92 -0
  31. data/dictionaries/pt.xml +365 -0
  32. data/dictionaries/ro.xml +163 -0
  33. data/dictionaries/ru.xml +150 -0
  34. data/dictionaries/sv.xml +255 -0
  35. data/dictionaries/tl.xml +67 -0
  36. data/dictionaries/tr.xml +65 -0
  37. data/dictionaries/uk.xml +98 -0
  38. data/dictionaries/yi.xml +293 -0
  39. data/ext/article.c +119 -0
  40. data/ext/dictionary.c +335 -0
  41. data/ext/extconf.rb +13 -14
  42. data/ext/grader-tc.c +185 -0
  43. data/ext/grader-tc.h +64 -0
  44. data/ext/grader-tf.c +116 -0
  45. data/ext/grader.c +85 -0
  46. data/ext/highlighter.c +128 -0
  47. data/ext/html.c +131 -0
  48. data/ext/libots.h +158 -0
  49. data/ext/ots.c +130 -151
  50. data/ext/ots.h +15 -0
  51. data/ext/parser.c +173 -0
  52. data/ext/relations.c +163 -0
  53. data/ext/stemmer.c +332 -0
  54. data/ext/text.c +98 -0
  55. data/ext/version.h +2 -0
  56. data/ext/wordlist.c +220 -0
  57. data/test/helper.rb +3 -0
  58. data/test/test_article.rb +52 -0
  59. data/test/test_ots.rb +23 -0
  60. metadata +122 -38
  61. data/README +0 -25
  62. data/VERSION +0 -1
  63. data/lib/ots.rb +0 -1
  64. data/test/ots_test.rb +0 -62
data/README DELETED
@@ -1,25 +0,0 @@
1
- ots is an interface to libots - The open text summarizer
2
-
3
- INSTALL:
4
-
5
- sudo gem install ots --source http://gems.github.com
6
-
7
- REQUIREMENT:
8
-
9
- * Ruby >= 1.8.7 ( >= 1.9.1 recommended)
10
- * rubygems >= 1.3.5
11
- * ruby development libraries (debian: ruby1.8-dev, ruby1.9.1-dev)
12
- * libxml2 development libraries (debian: libxml2-dev)
13
- * libots development libraries (debian: libots-dev)
14
- * glib2.0 development libraries (debian: libglib2.0-dev)
15
-
16
- USAGE:
17
-
18
- >> require "rubygems"
19
- >> require "ots"
20
- >> summarizer = ots.new
21
- >> summarizer.parse("I think I need some ice cream to cool me off. It is too hot down under")
22
- >> summarizer.title
23
- => [ "hot","cool","cream","ice","think" ]
24
- >> summarizer.summarize(:lines => 1)
25
- => [ { :sentence => "I think I need some ice cream to cool me off", :score => 57 } ]
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.4.3
data/lib/ots.rb DELETED
@@ -1 +0,0 @@
1
- require File.join(File.dirname(__FILE__), %w(.. ext ots))
@@ -1,62 +0,0 @@
1
- require 'helper'
2
-
3
- class OTSTest < Test::Unit::TestCase
4
-
5
- SAMPLE = <<-TEXT
6
- The hawksbill turtle is a critically endangered sea turtle belonging to the family Cheloniidae.
7
- It is the only species in its genus. The species has a worldwide distribution, with Atlantic and
8
- Pacific subspecies.
9
- TEXT
10
-
11
- context 'Title' do
12
- should 'extract title from given document' do
13
- ots = OTS.new
14
- ots.parse SAMPLE
15
- assert_equal 'species,turtle,subspecies,pacific,atlantic', ots.title
16
- end
17
- end
18
-
19
- context 'Keywords' do
20
- should 'extract keywords from given document' do
21
- ots = OTS.new
22
- ots.parse SAMPLE
23
- assert_equal %W(
24
- species turtle subspecies pacific atlantic distribution worldwide genus cheloniidae family
25
- belonging sea endangered critically hawksbill
26
- ), ots.keywords
27
- end
28
- end
29
-
30
- context 'Summary' do
31
- should 'extract keywords from given document' do
32
- ots = OTS.new
33
- ots.parse SAMPLE
34
- lines = ots.summarize(:lines => 2).map do |value|
35
- { :sentence => value[:sentence].gsub(/\n\s*/, ' ').strip, :score => value[:score] }
36
- end
37
-
38
- assert_equal [
39
- {
40
- :sentence => "The hawksbill turtle is a critically endangered sea turtle belonging to the family Cheloniidae.",
41
- :score => 48
42
- },
43
- {
44
- :sentence => "The species has a worldwide distribution, with Atlantic and Pacific subspecies.",
45
- :score => 20
46
- }
47
- ], lines
48
-
49
- end
50
-
51
- should 'utf8 encode strings properly' do
52
- ots = OTS.new
53
- text = "The hawksbill turtle\xE2\x80\x93is critically endangered."
54
- text.force_encoding('UTF-8') if RUBY_VERSION >= "1.9"
55
-
56
- ots.parse(text)
57
- summary = ots.summarize(:lines => 1).first[:sentence]
58
- assert_equal text, summary
59
- end
60
- end
61
-
62
- end