summarize 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/README.markdown +25 -0
- data/Rakefile +5 -0
- data/ext/summarize/extconf.rb +0 -3
- data/ext/summarize/summarize.c +17 -5
- data/ext/summarize/summarize.h +1 -1
- data/lib/summarize.rb +17 -8
- data/summarize.gemspec +1 -1
- metadata +3 -3
data/.gitignore
CHANGED
data/README.markdown
CHANGED
@@ -33,6 +33,13 @@ You can also use custom stemming rules
|
|
33
33
|
|
34
34
|
"text to summarize".summarize(:dictionary => 'path/to/custom/dictionary')
|
35
35
|
|
36
|
+
Topics can also be returned
|
37
|
+
|
38
|
+
# Returns an array with content first and topics second
|
39
|
+
content, topics = "text to summarize".summarize(:topics => true)
|
40
|
+
# content => summarized text
|
41
|
+
# topics => Comma-delimited string of topics
|
42
|
+
|
36
43
|
## Dependencies
|
37
44
|
|
38
45
|
You must have glib-2.0 and libxml-2.0 installed and properly configured.
|
@@ -42,3 +49,21 @@ You must have glib-2.0 and libxml-2.0 installed and properly configured.
|
|
42
49
|
Gem written by Sean Soper ([@ssoper](http://twitter.com/ssoper))
|
43
50
|
|
44
51
|
The Open Text Summarizer library was written by Nadav Rotem and can be found at <http://libots.sourceforge.net/>
|
52
|
+
|
53
|
+
## License
|
54
|
+
|
55
|
+
Copyright (C) 2010 Sean Soper <sean.soper@gmail.com>
|
56
|
+
|
57
|
+
This program is free software; you can redistribute it and/or modify
|
58
|
+
it under the terms of the GNU General Public License as published by
|
59
|
+
the Free Software Foundation; either version 2 of the License, or
|
60
|
+
(at your option) any later version.
|
61
|
+
|
62
|
+
This program is distributed in the hope that it will be useful,
|
63
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
64
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
65
|
+
GNU Library General Public License for more details.
|
66
|
+
|
67
|
+
You should have received a copy of the GNU General Public License
|
68
|
+
along with this program; if not, write to the Free Software
|
69
|
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
data/Rakefile
CHANGED
@@ -44,6 +44,11 @@ task :test => :build do
|
|
44
44
|
content.summarize(:dictionary => '/path/to/nowhere')
|
45
45
|
end
|
46
46
|
end
|
47
|
+
|
48
|
+
def test_topics
|
49
|
+
content, topics = @jupiter.split('||').first.summarize(:topics => true)
|
50
|
+
assert_equal topics, "jupiter,planet,moon,system,mass"
|
51
|
+
end
|
47
52
|
end
|
48
53
|
|
49
54
|
end
|
data/ext/summarize/extconf.rb
CHANGED
data/ext/summarize/summarize.c
CHANGED
@@ -12,18 +12,22 @@
|
|
12
12
|
|
13
13
|
void Init_summarize() {
|
14
14
|
VALUE rb_mOts = rb_define_module("Summarize");
|
15
|
-
rb_define_module_function(rb_mOts, "summarize", summarize,
|
15
|
+
rb_define_module_function(rb_mOts, "summarize", summarize, 4);
|
16
16
|
}
|
17
17
|
|
18
|
-
static VALUE summarize(const VALUE self, volatile VALUE rb_str, volatile VALUE rb_dict_file, const VALUE rb_ratio) {
|
18
|
+
static VALUE summarize(const VALUE self, volatile VALUE rb_str, volatile VALUE rb_dict_file, const VALUE rb_ratio, const VALUE rb_topics) {
|
19
19
|
long int length = RSTRING_LEN(rb_str);
|
20
20
|
char *text = StringValuePtr(rb_str);
|
21
21
|
char *dictionary_file = StringValuePtr(rb_dict_file);
|
22
22
|
int ratio = NUM2INT(rb_ratio);
|
23
|
-
|
23
|
+
|
24
24
|
size_t result_len;
|
25
25
|
OtsArticle *doc = ots_new_article();
|
26
26
|
|
27
|
+
VALUE summary;
|
28
|
+
VALUE topics;
|
29
|
+
VALUE result;
|
30
|
+
|
27
31
|
if (!ots_load_xml_dictionary(doc, dictionary_file)) {
|
28
32
|
ots_free_article(doc);
|
29
33
|
rb_raise(rb_eRuntimeError, "Cannot load dictionary file");
|
@@ -34,9 +38,17 @@ static VALUE summarize(const VALUE self, volatile VALUE rb_str, volatile VALUE r
|
|
34
38
|
ots_grade_doc(doc);
|
35
39
|
ots_highlight_doc(doc, ratio);
|
36
40
|
|
37
|
-
|
41
|
+
summary = rb_str_new2(ots_get_doc_text(doc, &result_len));
|
42
|
+
topics = rb_str_new2((const char *)doc->title);
|
38
43
|
|
39
44
|
ots_free_article(doc);
|
40
45
|
|
41
|
-
|
46
|
+
if (rb_topics == Qtrue) {
|
47
|
+
result = rb_ary_new();
|
48
|
+
rb_ary_push(result, summary);
|
49
|
+
rb_ary_push(result, topics);
|
50
|
+
return result;
|
51
|
+
} else {
|
52
|
+
return summary;
|
53
|
+
}
|
42
54
|
}
|
data/ext/summarize/summarize.h
CHANGED
data/lib/summarize.rb
CHANGED
@@ -10,7 +10,7 @@ class Hash #:nodoc:
|
|
10
10
|
end unless {}.respond_to? 'symbolize_keys'
|
11
11
|
|
12
12
|
module Summarize
|
13
|
-
VERSION = "1.0.
|
13
|
+
VERSION = "1.0.3"
|
14
14
|
|
15
15
|
LANGUAGES = [
|
16
16
|
'bg', # Bulgarian
|
@@ -54,8 +54,9 @@ module Summarize
|
|
54
54
|
|
55
55
|
def self.parse_options(options = {}) #:nodoc:
|
56
56
|
default_options = {
|
57
|
-
:ratio => 25,
|
58
|
-
:language => 'en' # ISO 639-1 code
|
57
|
+
:ratio => 25, # percentage
|
58
|
+
:language => 'en', # ISO 639-1 code
|
59
|
+
:topics => false
|
59
60
|
}
|
60
61
|
|
61
62
|
options = default_options.merge(options.symbolize_keys)
|
@@ -67,7 +68,7 @@ module Summarize
|
|
67
68
|
dict_file = File.join(File.expand_path(File.dirname(__FILE__)), "../ext/summarize/dic/#{options[:language]}")
|
68
69
|
end
|
69
70
|
|
70
|
-
return [dict_file, options[:ratio]]
|
71
|
+
return [dict_file, options[:ratio], options[:topics]]
|
71
72
|
end
|
72
73
|
|
73
74
|
end
|
@@ -87,12 +88,16 @@ class String
|
|
87
88
|
# dictionary::
|
88
89
|
# A path to a custom stemming XML file
|
89
90
|
#
|
91
|
+
# topics::
|
92
|
+
# A boolean indicating whether to return topics as well. Return value will be
|
93
|
+
# an array instead with content first and topics second. Default is false.
|
94
|
+
#
|
90
95
|
# == Returns:
|
91
|
-
# A string summary
|
96
|
+
# A string summary OR an array of content and topics
|
92
97
|
#
|
93
98
|
def summarize(options = {})
|
94
|
-
dict_file, ratio = Summarize.parse_options(options)
|
95
|
-
String.send(:summarize, self, dict_file, ratio)
|
99
|
+
dict_file, ratio, topics = Summarize.parse_options(options)
|
100
|
+
String.send(:summarize, self, dict_file, ratio, topics)
|
96
101
|
end
|
97
102
|
|
98
103
|
end
|
@@ -111,8 +116,12 @@ class File
|
|
111
116
|
# dictionary::
|
112
117
|
# A path to a custom stemming XML file
|
113
118
|
#
|
119
|
+
# topics::
|
120
|
+
# A boolean indicating whether to return topics as well. Return value will be
|
121
|
+
# an array instead with content first and topics second. Default is false.
|
122
|
+
#
|
114
123
|
# == Returns:
|
115
|
-
# A string summary
|
124
|
+
# A string summary OR an array of content and topics
|
116
125
|
#
|
117
126
|
def summarize(options = {})
|
118
127
|
self.read.summarize(options)
|
data/summarize.gemspec
CHANGED
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 1
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 1.0.
|
8
|
+
- 3
|
9
|
+
version: 1.0.3
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Sean Soper
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-12-06 00:00:00 -05:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|