summarize 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/README.markdown +25 -0
- data/Rakefile +5 -0
- data/ext/summarize/extconf.rb +0 -3
- data/ext/summarize/summarize.c +17 -5
- data/ext/summarize/summarize.h +1 -1
- data/lib/summarize.rb +17 -8
- data/summarize.gemspec +1 -1
- metadata +3 -3
data/.gitignore
CHANGED
data/README.markdown
CHANGED
@@ -33,6 +33,13 @@ You can also use custom stemming rules
|
|
33
33
|
|
34
34
|
"text to summarize".summarize(:dictionary => 'path/to/custom/dictionary')
|
35
35
|
|
36
|
+
Topics can also be returned
|
37
|
+
|
38
|
+
# Returns an array with content first and topics second
|
39
|
+
content, topics = "text to summarize".summarize(:topics => true)
|
40
|
+
# content => summarized text
|
41
|
+
# topics => Comma-delimited string of topics
|
42
|
+
|
36
43
|
## Dependencies
|
37
44
|
|
38
45
|
You must have glib-2.0 and libxml-2.0 installed and properly configured.
|
@@ -42,3 +49,21 @@ You must have glib-2.0 and libxml-2.0 installed and properly configured.
|
|
42
49
|
Gem written by Sean Soper ([@ssoper](http://twitter.com/ssoper))
|
43
50
|
|
44
51
|
The Open Text Summarizer library was written by Nadav Rotem and can be found at <http://libots.sourceforge.net/>
|
52
|
+
|
53
|
+
## License
|
54
|
+
|
55
|
+
Copyright (C) 2010 Sean Soper <sean.soper@gmail.com>
|
56
|
+
|
57
|
+
This program is free software; you can redistribute it and/or modify
|
58
|
+
it under the terms of the GNU General Public License as published by
|
59
|
+
the Free Software Foundation; either version 2 of the License, or
|
60
|
+
(at your option) any later version.
|
61
|
+
|
62
|
+
This program is distributed in the hope that it will be useful,
|
63
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
64
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
65
|
+
GNU Library General Public License for more details.
|
66
|
+
|
67
|
+
You should have received a copy of the GNU General Public License
|
68
|
+
along with this program; if not, write to the Free Software
|
69
|
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
data/Rakefile
CHANGED
@@ -44,6 +44,11 @@ task :test => :build do
|
|
44
44
|
content.summarize(:dictionary => '/path/to/nowhere')
|
45
45
|
end
|
46
46
|
end
|
47
|
+
|
48
|
+
def test_topics
|
49
|
+
content, topics = @jupiter.split('||').first.summarize(:topics => true)
|
50
|
+
assert_equal topics, "jupiter,planet,moon,system,mass"
|
51
|
+
end
|
47
52
|
end
|
48
53
|
|
49
54
|
end
|
data/ext/summarize/extconf.rb
CHANGED
data/ext/summarize/summarize.c
CHANGED
@@ -12,18 +12,22 @@
|
|
12
12
|
|
13
13
|
void Init_summarize() {
|
14
14
|
VALUE rb_mOts = rb_define_module("Summarize");
|
15
|
-
rb_define_module_function(rb_mOts, "summarize", summarize,
|
15
|
+
rb_define_module_function(rb_mOts, "summarize", summarize, 4);
|
16
16
|
}
|
17
17
|
|
18
|
-
static VALUE summarize(const VALUE self, volatile VALUE rb_str, volatile VALUE rb_dict_file, const VALUE rb_ratio) {
|
18
|
+
static VALUE summarize(const VALUE self, volatile VALUE rb_str, volatile VALUE rb_dict_file, const VALUE rb_ratio, const VALUE rb_topics) {
|
19
19
|
long int length = RSTRING_LEN(rb_str);
|
20
20
|
char *text = StringValuePtr(rb_str);
|
21
21
|
char *dictionary_file = StringValuePtr(rb_dict_file);
|
22
22
|
int ratio = NUM2INT(rb_ratio);
|
23
|
-
|
23
|
+
|
24
24
|
size_t result_len;
|
25
25
|
OtsArticle *doc = ots_new_article();
|
26
26
|
|
27
|
+
VALUE summary;
|
28
|
+
VALUE topics;
|
29
|
+
VALUE result;
|
30
|
+
|
27
31
|
if (!ots_load_xml_dictionary(doc, dictionary_file)) {
|
28
32
|
ots_free_article(doc);
|
29
33
|
rb_raise(rb_eRuntimeError, "Cannot load dictionary file");
|
@@ -34,9 +38,17 @@ static VALUE summarize(const VALUE self, volatile VALUE rb_str, volatile VALUE r
|
|
34
38
|
ots_grade_doc(doc);
|
35
39
|
ots_highlight_doc(doc, ratio);
|
36
40
|
|
37
|
-
|
41
|
+
summary = rb_str_new2(ots_get_doc_text(doc, &result_len));
|
42
|
+
topics = rb_str_new2((const char *)doc->title);
|
38
43
|
|
39
44
|
ots_free_article(doc);
|
40
45
|
|
41
|
-
|
46
|
+
if (rb_topics == Qtrue) {
|
47
|
+
result = rb_ary_new();
|
48
|
+
rb_ary_push(result, summary);
|
49
|
+
rb_ary_push(result, topics);
|
50
|
+
return result;
|
51
|
+
} else {
|
52
|
+
return summary;
|
53
|
+
}
|
42
54
|
}
|
data/ext/summarize/summarize.h
CHANGED
data/lib/summarize.rb
CHANGED
@@ -10,7 +10,7 @@ class Hash #:nodoc:
|
|
10
10
|
end unless {}.respond_to? 'symbolize_keys'
|
11
11
|
|
12
12
|
module Summarize
|
13
|
-
VERSION = "1.0.
|
13
|
+
VERSION = "1.0.3"
|
14
14
|
|
15
15
|
LANGUAGES = [
|
16
16
|
'bg', # Bulgarian
|
@@ -54,8 +54,9 @@ module Summarize
|
|
54
54
|
|
55
55
|
def self.parse_options(options = {}) #:nodoc:
|
56
56
|
default_options = {
|
57
|
-
:ratio => 25,
|
58
|
-
:language => 'en' # ISO 639-1 code
|
57
|
+
:ratio => 25, # percentage
|
58
|
+
:language => 'en', # ISO 639-1 code
|
59
|
+
:topics => false
|
59
60
|
}
|
60
61
|
|
61
62
|
options = default_options.merge(options.symbolize_keys)
|
@@ -67,7 +68,7 @@ module Summarize
|
|
67
68
|
dict_file = File.join(File.expand_path(File.dirname(__FILE__)), "../ext/summarize/dic/#{options[:language]}")
|
68
69
|
end
|
69
70
|
|
70
|
-
return [dict_file, options[:ratio]]
|
71
|
+
return [dict_file, options[:ratio], options[:topics]]
|
71
72
|
end
|
72
73
|
|
73
74
|
end
|
@@ -87,12 +88,16 @@ class String
|
|
87
88
|
# dictionary::
|
88
89
|
# A path to a custom stemming XML file
|
89
90
|
#
|
91
|
+
# topics::
|
92
|
+
# A boolean indicating whether to return topics as well. Return value will be
|
93
|
+
# an array instead with content first and topics second. Default is false.
|
94
|
+
#
|
90
95
|
# == Returns:
|
91
|
-
# A string summary
|
96
|
+
# A string summary OR an array of content and topics
|
92
97
|
#
|
93
98
|
def summarize(options = {})
|
94
|
-
dict_file, ratio = Summarize.parse_options(options)
|
95
|
-
String.send(:summarize, self, dict_file, ratio)
|
99
|
+
dict_file, ratio, topics = Summarize.parse_options(options)
|
100
|
+
String.send(:summarize, self, dict_file, ratio, topics)
|
96
101
|
end
|
97
102
|
|
98
103
|
end
|
@@ -111,8 +116,12 @@ class File
|
|
111
116
|
# dictionary::
|
112
117
|
# A path to a custom stemming XML file
|
113
118
|
#
|
119
|
+
# topics::
|
120
|
+
# A boolean indicating whether to return topics as well. Return value will be
|
121
|
+
# an array instead with content first and topics second. Default is false.
|
122
|
+
#
|
114
123
|
# == Returns:
|
115
|
-
# A string summary
|
124
|
+
# A string summary OR an array of content and topics
|
116
125
|
#
|
117
126
|
def summarize(options = {})
|
118
127
|
self.read.summarize(options)
|
data/summarize.gemspec
CHANGED
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 1
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 1.0.
|
8
|
+
- 3
|
9
|
+
version: 1.0.3
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Sean Soper
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-12-06 00:00:00 -05:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|