summarize 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.markdown CHANGED
@@ -12,7 +12,7 @@
12
12
  cd summarize
13
13
  rake build
14
14
  gem build summarize.gemspec
15
- gem install summarize-1.0.gem
15
+ gem install summarize-1.0.1.gem
16
16
 
17
17
  ## Usage
18
18
 
@@ -27,7 +27,7 @@ Or use the String method
27
27
  By default it uses an English dictionary for summarizing but forty languages are supported. Pass in the valid ISO 639 language code to use one. A ratio (default is 25%) can also be passed in.
28
28
 
29
29
  # Parse an article using Portuguese stemming rules with a ratio of 50%
30
- "text to summarize".summarize(:language => 'pt', :ratio => 50)
30
+ "texto para sumariar".summarize(:language => 'pt', :ratio => 50)
31
31
 
32
32
  You can also use custom stemming rules
33
33
 
@@ -1,6 +1,10 @@
1
1
  require 'mkmf'
2
2
 
3
- $CFLAGS = ENV["CFLAGS"].to_s + " " + `pkg-config --cflags glib-2.0 libxml-2.0`.chomp
4
- $LDFLAGS = ENV["LDFLAGS"].to_s + " " + `pkg-config --libs glib-2.0 libxml-2.0`.chomp
3
+ %w(glib-2.0 libxml-2.0).each do |lib|
4
+ pkg_config lib
5
+ end
6
+
7
+ # $CFLAGS = ENV["CFLAGS"].to_s + " " + `pkg-config --cflags glib-2.0 libxml-2.0`.chomp
8
+ # $LDFLAGS = ENV["LDFLAGS"].to_s + " " + `pkg-config --libs glib-2.0 libxml-2.0`.chomp
5
9
 
6
10
  create_makefile('summarize/summarize')
@@ -102,11 +102,14 @@ N = (total-number-of-sentences)
102
102
  f = n/N
103
103
  */
104
104
 
105
+ /*
106
+ ssoper: unused and causing warning messages
107
+
105
108
  double
106
109
  ots_calc_idf (const int term_count,const int doc_word_count)
107
110
  {
108
111
  return -log(doc_word_count/term_count);
109
- }
112
+ }*/
110
113
 
111
114
  double
112
115
  ots_calc_tf (const int term_count,const int doc_word_count)
@@ -31,7 +31,7 @@ static int
31
31
  ots_highlight_max_line (OtsArticle * Doc)
32
32
  {
33
33
  GList *li;
34
- int max = 0;
34
+ long int max = 0;
35
35
  for (li = (GList *) Doc->lines; li != NULL; li = li->next)
36
36
  {
37
37
  if (0 == (((OtsSentence *) li->data)->selected)) /* if not selected , count me in */
@@ -149,7 +149,7 @@ GList* ots_text_stem_list(const unsigned char *text,const unsigned char *lang_co
149
149
 
150
150
 
151
151
  /*Gives a score on the relations between two lists of topics; simmilar to the inner product*/
152
- int ots_topic_list_score(const GList *topic_list1,const GList *topic_list2);
152
+ int ots_topic_list_score(GList *topic_list1, GList *topic_list2);
153
153
 
154
154
  G_END_DECLS
155
155
 
@@ -29,8 +29,7 @@
29
29
  int
30
30
  ots_match_post (const char *aWord,const char *post)
31
31
  {
32
- int i, wlen, plen;
33
-
32
+ long int i, wlen, plen;
34
33
 
35
34
  wlen = strlen (aWord);
36
35
  plen = strlen (post);
@@ -118,7 +117,7 @@ ots_parse_stream(const unsigned char *utf8, size_t len, OtsArticle * Doc) /*pars
118
117
  OtsSentence *tmpLine = ots_append_line (Doc);
119
118
  OtsStemRule * rule=Doc->stem;
120
119
  gunichar uc;
121
- int index = 0;
120
+ size_t index = 0;
122
121
  char *s = (char *) utf8;
123
122
  GString *word_buffer = g_string_new (NULL);
124
123
 
@@ -132,8 +132,8 @@ return topics;
132
132
 
133
133
  /*Gives a score on the relations between two lists of topics; simmilar to the inner product*/
134
134
  int ots_topic_list_score(
135
- const GList *topic_list1,
136
- const GList *topic_list2)
135
+ GList *topic_list1,
136
+ GList *topic_list2)
137
137
  {
138
138
  int count=0;
139
139
  GList *tmplist1;
@@ -70,7 +70,7 @@ if (rule != NULL)
70
70
  static void
71
71
  ots_stem_break (unsigned const char *comp,unsigned char *part_a,unsigned char *part_b) /*given already alocated part_a and b */
72
72
  { /*example "red|blue" */
73
- int i, j, clen;
73
+ long int i, j, clen;
74
74
  i = 0;
75
75
  j = 0;
76
76
 
@@ -106,7 +106,7 @@ ots_stem_break (unsigned const char *comp,unsigned char *part_a,unsigned char *p
106
106
  static unsigned char *
107
107
  ots_stem_remove_pre (unsigned const char *aWord,unsigned const char *pre,unsigned const char *new)
108
108
  {
109
- int i, plen, wlen, nlen;
109
+ long int i, plen, wlen, nlen;
110
110
  unsigned char *new_str = NULL;
111
111
 
112
112
  if (aWord==NULL) return NULL;
@@ -135,7 +135,7 @@ ots_stem_remove_pre (unsigned const char *aWord,unsigned const char *pre,unsigne
135
135
  static unsigned char *
136
136
  ots_stem_remove_post (unsigned const char *aWord,unsigned const char *post,unsigned const char *new)
137
137
  {
138
- unsigned int i, wlen, plen, nlen;
138
+ unsigned long int i, wlen, plen, nlen;
139
139
  unsigned char *new_str = NULL;
140
140
 
141
141
  if ((NULL==aWord)||(NULL==post)||(NULL==new)) return NULL;
@@ -9,15 +9,14 @@
9
9
  #include "libots.h"
10
10
  #include "summarize.h"
11
11
 
12
- const char *OTS_ERROR_BAD_DICT = "Cannot load dictionary file";
13
12
 
14
13
  void Init_summarize() {
15
14
  VALUE rb_mOts = rb_define_module("Summarize");
16
15
  rb_define_module_function(rb_mOts, "summarize", summarize, 3);
17
16
  }
18
17
 
19
- static VALUE summarize(const VALUE self, const VALUE rb_str, const VALUE rb_dict_file, const VALUE rb_ratio) {
20
- int length = RSTRING_LEN(rb_str);
18
+ static VALUE summarize(const VALUE self, volatile VALUE rb_str, volatile VALUE rb_dict_file, const VALUE rb_ratio) {
19
+ long int length = RSTRING_LEN(rb_str);
21
20
  char *text = StringValuePtr(rb_str);
22
21
  char *dictionary_file = StringValuePtr(rb_dict_file);
23
22
  int ratio = NUM2INT(rb_ratio);
@@ -27,7 +26,7 @@ static VALUE summarize(const VALUE self, const VALUE rb_str, const VALUE rb_dict
27
26
 
28
27
  if (!ots_load_xml_dictionary(doc, dictionary_file)) {
29
28
  ots_free_article(doc);
30
- rb_raise(rb_eRuntimeError, OTS_ERROR_BAD_DICT);
29
+ rb_raise(rb_eRuntimeError, "Cannot load dictionary file");
31
30
  return Qnil;
32
31
  }
33
32
 
data/ext/summarize/text.c CHANGED
@@ -57,7 +57,7 @@ static void
57
57
  ots_print_line (FILE * stream, const OtsSentence * aLine)
58
58
  {
59
59
  unsigned char *utf8_txt;
60
- size_t len;
60
+ size_t len = 0;
61
61
  utf8_txt = ots_get_line_text (aLine, TRUE, &len);
62
62
  fwrite (utf8_txt, 1, len, stream);
63
63
  g_free (utf8_txt);
@@ -69,7 +69,7 @@ ots_get_doc_text (const OtsArticle * Doc, size_t * out_len)
69
69
  GList *li;
70
70
  GString *text;
71
71
  unsigned char *utf8_data;
72
- size_t line_len;
72
+ size_t line_len = 0;
73
73
 
74
74
  text = g_string_new (NULL);
75
75
 
data/lib/summarize.rb CHANGED
@@ -10,7 +10,7 @@ class Hash #:nodoc:
10
10
  end unless {}.respond_to? 'symbolize_keys'
11
11
 
12
12
  module Summarize
13
- VERSION = "1.0.1"
13
+ VERSION = "1.0.2"
14
14
 
15
15
  LANGUAGES = [
16
16
  'bg', # Bulgarian
@@ -84,6 +84,9 @@ class String
84
84
  # language::
85
85
  # An ISO 639-1 language code. See Summarize::LANGUAGES for the supported list.
86
86
  #
87
+ # dictionary::
88
+ # A path to a custom stemming XML file
89
+ #
87
90
  # == Returns:
88
91
  # A string summary
89
92
  #
@@ -105,6 +108,9 @@ class File
105
108
  # language::
106
109
  # An ISO 639-1 language code. See Summarize::LANGUAGES for the supported list.
107
110
  #
111
+ # dictionary::
112
+ # A path to a custom stemming XML file
113
+ #
108
114
  # == Returns:
109
115
  # A string summary
110
116
  #
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 1
7
7
  - 0
8
- - 1
9
- version: 1.0.1
8
+ - 2
9
+ version: 1.0.2
10
10
  platform: ruby
11
11
  authors:
12
12
  - Sean Soper