summarize 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.markdown +2 -2
- data/ext/summarize/extconf.rb +6 -2
- data/ext/summarize/grader-tf.c +4 -1
- data/ext/summarize/highlighter.c +1 -1
- data/ext/summarize/libots.h +1 -1
- data/ext/summarize/parser.c +2 -3
- data/ext/summarize/relations.c +2 -2
- data/ext/summarize/stemmer.c +3 -3
- data/ext/summarize/summarize.c +3 -4
- data/ext/summarize/text.c +2 -2
- data/lib/summarize.rb +7 -1
- metadata +2 -2
data/README.markdown
CHANGED
@@ -12,7 +12,7 @@
|
|
12
12
|
cd summarize
|
13
13
|
rake build
|
14
14
|
gem build summarize.gemspec
|
15
|
-
gem install summarize-1.0.gem
|
15
|
+
gem install summarize-1.0.1.gem
|
16
16
|
|
17
17
|
## Usage
|
18
18
|
|
@@ -27,7 +27,7 @@ Or use the String method
|
|
27
27
|
By default it uses an English dictionary for summarizing but forty languages are supported. Pass in the valid ISO 639 language code to use one. A ratio (default is 25%) can also be passed in.
|
28
28
|
|
29
29
|
# Parse an article using Portuguese stemming rules with a ratio of 50%
|
30
|
-
"
|
30
|
+
"texto para sumariar".summarize(:language => 'pt', :ratio => 50)
|
31
31
|
|
32
32
|
You can also use custom stemming rules
|
33
33
|
|
data/ext/summarize/extconf.rb
CHANGED
@@ -1,6 +1,10 @@
|
|
1
1
|
require 'mkmf'
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
%w(glib-2.0 libxml-2.0).each do |lib|
|
4
|
+
pkg_config lib
|
5
|
+
end
|
6
|
+
|
7
|
+
# $CFLAGS = ENV["CFLAGS"].to_s + " " + `pkg-config --cflags glib-2.0 libxml-2.0`.chomp
|
8
|
+
# $LDFLAGS = ENV["LDFLAGS"].to_s + " " + `pkg-config --libs glib-2.0 libxml-2.0`.chomp
|
5
9
|
|
6
10
|
create_makefile('summarize/summarize')
|
data/ext/summarize/grader-tf.c
CHANGED
@@ -102,11 +102,14 @@ N = (total-number-of-sentences)
|
|
102
102
|
f = n/N
|
103
103
|
*/
|
104
104
|
|
105
|
+
/*
|
106
|
+
ssoper: unused and causing warning messages
|
107
|
+
|
105
108
|
double
|
106
109
|
ots_calc_idf (const int term_count,const int doc_word_count)
|
107
110
|
{
|
108
111
|
return -log(doc_word_count/term_count);
|
109
|
-
}
|
112
|
+
}*/
|
110
113
|
|
111
114
|
double
|
112
115
|
ots_calc_tf (const int term_count,const int doc_word_count)
|
data/ext/summarize/highlighter.c
CHANGED
@@ -31,7 +31,7 @@ static int
|
|
31
31
|
ots_highlight_max_line (OtsArticle * Doc)
|
32
32
|
{
|
33
33
|
GList *li;
|
34
|
-
int max = 0;
|
34
|
+
long int max = 0;
|
35
35
|
for (li = (GList *) Doc->lines; li != NULL; li = li->next)
|
36
36
|
{
|
37
37
|
if (0 == (((OtsSentence *) li->data)->selected)) /* if not selected , count me in */
|
data/ext/summarize/libots.h
CHANGED
@@ -149,7 +149,7 @@ GList* ots_text_stem_list(const unsigned char *text,const unsigned char *lang_co
|
|
149
149
|
|
150
150
|
|
151
151
|
/*Gives a score on the relations between two lists of topics; simmilar to the inner product*/
|
152
|
-
int ots_topic_list_score(
|
152
|
+
int ots_topic_list_score(GList *topic_list1, GList *topic_list2);
|
153
153
|
|
154
154
|
G_END_DECLS
|
155
155
|
|
data/ext/summarize/parser.c
CHANGED
@@ -29,8 +29,7 @@
|
|
29
29
|
int
|
30
30
|
ots_match_post (const char *aWord,const char *post)
|
31
31
|
{
|
32
|
-
int i, wlen, plen;
|
33
|
-
|
32
|
+
long int i, wlen, plen;
|
34
33
|
|
35
34
|
wlen = strlen (aWord);
|
36
35
|
plen = strlen (post);
|
@@ -118,7 +117,7 @@ ots_parse_stream(const unsigned char *utf8, size_t len, OtsArticle * Doc) /*pars
|
|
118
117
|
OtsSentence *tmpLine = ots_append_line (Doc);
|
119
118
|
OtsStemRule * rule=Doc->stem;
|
120
119
|
gunichar uc;
|
121
|
-
|
120
|
+
size_t index = 0;
|
122
121
|
char *s = (char *) utf8;
|
123
122
|
GString *word_buffer = g_string_new (NULL);
|
124
123
|
|
data/ext/summarize/relations.c
CHANGED
@@ -132,8 +132,8 @@ return topics;
|
|
132
132
|
|
133
133
|
/*Gives a score on the relations between two lists of topics; simmilar to the inner product*/
|
134
134
|
int ots_topic_list_score(
|
135
|
-
|
136
|
-
|
135
|
+
GList *topic_list1,
|
136
|
+
GList *topic_list2)
|
137
137
|
{
|
138
138
|
int count=0;
|
139
139
|
GList *tmplist1;
|
data/ext/summarize/stemmer.c
CHANGED
@@ -70,7 +70,7 @@ if (rule != NULL)
|
|
70
70
|
static void
|
71
71
|
ots_stem_break (unsigned const char *comp,unsigned char *part_a,unsigned char *part_b) /*given already alocated part_a and b */
|
72
72
|
{ /*example "red|blue" */
|
73
|
-
int i, j, clen;
|
73
|
+
long int i, j, clen;
|
74
74
|
i = 0;
|
75
75
|
j = 0;
|
76
76
|
|
@@ -106,7 +106,7 @@ ots_stem_break (unsigned const char *comp,unsigned char *part_a,unsigned char *p
|
|
106
106
|
static unsigned char *
|
107
107
|
ots_stem_remove_pre (unsigned const char *aWord,unsigned const char *pre,unsigned const char *new)
|
108
108
|
{
|
109
|
-
int i, plen, wlen, nlen;
|
109
|
+
long int i, plen, wlen, nlen;
|
110
110
|
unsigned char *new_str = NULL;
|
111
111
|
|
112
112
|
if (aWord==NULL) return NULL;
|
@@ -135,7 +135,7 @@ ots_stem_remove_pre (unsigned const char *aWord,unsigned const char *pre,unsigne
|
|
135
135
|
static unsigned char *
|
136
136
|
ots_stem_remove_post (unsigned const char *aWord,unsigned const char *post,unsigned const char *new)
|
137
137
|
{
|
138
|
-
unsigned int i, wlen, plen, nlen;
|
138
|
+
unsigned long int i, wlen, plen, nlen;
|
139
139
|
unsigned char *new_str = NULL;
|
140
140
|
|
141
141
|
if ((NULL==aWord)||(NULL==post)||(NULL==new)) return NULL;
|
data/ext/summarize/summarize.c
CHANGED
@@ -9,15 +9,14 @@
|
|
9
9
|
#include "libots.h"
|
10
10
|
#include "summarize.h"
|
11
11
|
|
12
|
-
const char *OTS_ERROR_BAD_DICT = "Cannot load dictionary file";
|
13
12
|
|
14
13
|
void Init_summarize() {
|
15
14
|
VALUE rb_mOts = rb_define_module("Summarize");
|
16
15
|
rb_define_module_function(rb_mOts, "summarize", summarize, 3);
|
17
16
|
}
|
18
17
|
|
19
|
-
static VALUE summarize(const VALUE self,
|
20
|
-
int length = RSTRING_LEN(rb_str);
|
18
|
+
static VALUE summarize(const VALUE self, volatile VALUE rb_str, volatile VALUE rb_dict_file, const VALUE rb_ratio) {
|
19
|
+
long int length = RSTRING_LEN(rb_str);
|
21
20
|
char *text = StringValuePtr(rb_str);
|
22
21
|
char *dictionary_file = StringValuePtr(rb_dict_file);
|
23
22
|
int ratio = NUM2INT(rb_ratio);
|
@@ -27,7 +26,7 @@ static VALUE summarize(const VALUE self, const VALUE rb_str, const VALUE rb_dict
|
|
27
26
|
|
28
27
|
if (!ots_load_xml_dictionary(doc, dictionary_file)) {
|
29
28
|
ots_free_article(doc);
|
30
|
-
rb_raise(rb_eRuntimeError,
|
29
|
+
rb_raise(rb_eRuntimeError, "Cannot load dictionary file");
|
31
30
|
return Qnil;
|
32
31
|
}
|
33
32
|
|
data/ext/summarize/text.c
CHANGED
@@ -57,7 +57,7 @@ static void
|
|
57
57
|
ots_print_line (FILE * stream, const OtsSentence * aLine)
|
58
58
|
{
|
59
59
|
unsigned char *utf8_txt;
|
60
|
-
size_t len;
|
60
|
+
size_t len = 0;
|
61
61
|
utf8_txt = ots_get_line_text (aLine, TRUE, &len);
|
62
62
|
fwrite (utf8_txt, 1, len, stream);
|
63
63
|
g_free (utf8_txt);
|
@@ -69,7 +69,7 @@ ots_get_doc_text (const OtsArticle * Doc, size_t * out_len)
|
|
69
69
|
GList *li;
|
70
70
|
GString *text;
|
71
71
|
unsigned char *utf8_data;
|
72
|
-
size_t line_len;
|
72
|
+
size_t line_len = 0;
|
73
73
|
|
74
74
|
text = g_string_new (NULL);
|
75
75
|
|
data/lib/summarize.rb
CHANGED
@@ -10,7 +10,7 @@ class Hash #:nodoc:
|
|
10
10
|
end unless {}.respond_to? 'symbolize_keys'
|
11
11
|
|
12
12
|
module Summarize
|
13
|
-
VERSION = "1.0.
|
13
|
+
VERSION = "1.0.2"
|
14
14
|
|
15
15
|
LANGUAGES = [
|
16
16
|
'bg', # Bulgarian
|
@@ -84,6 +84,9 @@ class String
|
|
84
84
|
# language::
|
85
85
|
# An ISO 639-1 language code. See Summarize::LANGUAGES for the supported list.
|
86
86
|
#
|
87
|
+
# dictionary::
|
88
|
+
# A path to a custom stemming XML file
|
89
|
+
#
|
87
90
|
# == Returns:
|
88
91
|
# A string summary
|
89
92
|
#
|
@@ -105,6 +108,9 @@ class File
|
|
105
108
|
# language::
|
106
109
|
# An ISO 639-1 language code. See Summarize::LANGUAGES for the supported list.
|
107
110
|
#
|
111
|
+
# dictionary::
|
112
|
+
# A path to a custom stemming XML file
|
113
|
+
#
|
108
114
|
# == Returns:
|
109
115
|
# A string summary
|
110
116
|
#
|