summarize 1.0.2 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -1,4 +1,5 @@
1
1
  Makefile
2
+ mkmf.log
2
3
  lib/**/*.bundle
3
4
  tmp/*
4
5
  doc/*
@@ -33,6 +33,13 @@ You can also use custom stemming rules
33
33
 
34
34
  "text to summarize".summarize(:dictionary => 'path/to/custom/dictionary')
35
35
 
36
+ Topics can also be returned
37
+
38
+ # Returns an array with content first and topics second
39
+ content, topics = "text to summarize".summarize(:topics => true)
40
+ # content => summarized text
41
+ # topics => Comma-delimited string of topics
42
+
36
43
  ## Dependencies
37
44
 
38
45
  You must have glib-2.0 and libxml-2.0 installed and properly configured.
@@ -42,3 +49,21 @@ You must have glib-2.0 and libxml-2.0 installed and properly configured.
42
49
  Gem written by Sean Soper ([@ssoper](http://twitter.com/ssoper))
43
50
 
44
51
  The Open Text Summarizer library was written by Nadav Rotem and can be found at <http://libots.sourceforge.net/>
52
+
53
+ ## License
54
+
55
+ Copyright (C) 2010 Sean Soper <sean.soper@gmail.com>
56
+
57
+ This program is free software; you can redistribute it and/or modify
58
+ it under the terms of the GNU General Public License as published by
59
+ the Free Software Foundation; either version 2 of the License, or
60
+ (at your option) any later version.
61
+
62
+ This program is distributed in the hope that it will be useful,
63
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
64
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
65
+ GNU Library General Public License for more details.
66
+
67
+ You should have received a copy of the GNU General Public License
68
+ along with this program; if not, write to the Free Software
69
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
data/Rakefile CHANGED
@@ -44,6 +44,11 @@ task :test => :build do
44
44
  content.summarize(:dictionary => '/path/to/nowhere')
45
45
  end
46
46
  end
47
+
48
+ def test_topics
49
+ content, topics = @jupiter.split('||').first.summarize(:topics => true)
50
+ assert_equal topics, "jupiter,planet,moon,system,mass"
51
+ end
47
52
  end
48
53
 
49
54
  end
@@ -4,7 +4,4 @@ require 'mkmf'
4
4
  pkg_config lib
5
5
  end
6
6
 
7
- # $CFLAGS = ENV["CFLAGS"].to_s + " " + `pkg-config --cflags glib-2.0 libxml-2.0`.chomp
8
- # $LDFLAGS = ENV["LDFLAGS"].to_s + " " + `pkg-config --libs glib-2.0 libxml-2.0`.chomp
9
-
10
7
  create_makefile('summarize/summarize')
@@ -12,18 +12,22 @@
12
12
 
13
13
  void Init_summarize() {
14
14
  VALUE rb_mOts = rb_define_module("Summarize");
15
- rb_define_module_function(rb_mOts, "summarize", summarize, 3);
15
+ rb_define_module_function(rb_mOts, "summarize", summarize, 4);
16
16
  }
17
17
 
18
- static VALUE summarize(const VALUE self, volatile VALUE rb_str, volatile VALUE rb_dict_file, const VALUE rb_ratio) {
18
+ static VALUE summarize(const VALUE self, volatile VALUE rb_str, volatile VALUE rb_dict_file, const VALUE rb_ratio, const VALUE rb_topics) {
19
19
  long int length = RSTRING_LEN(rb_str);
20
20
  char *text = StringValuePtr(rb_str);
21
21
  char *dictionary_file = StringValuePtr(rb_dict_file);
22
22
  int ratio = NUM2INT(rb_ratio);
23
- unsigned char *result;
23
+
24
24
  size_t result_len;
25
25
  OtsArticle *doc = ots_new_article();
26
26
 
27
+ VALUE summary;
28
+ VALUE topics;
29
+ VALUE result;
30
+
27
31
  if (!ots_load_xml_dictionary(doc, dictionary_file)) {
28
32
  ots_free_article(doc);
29
33
  rb_raise(rb_eRuntimeError, "Cannot load dictionary file");
@@ -34,9 +38,17 @@ static VALUE summarize(const VALUE self, volatile VALUE rb_str, volatile VALUE r
34
38
  ots_grade_doc(doc);
35
39
  ots_highlight_doc(doc, ratio);
36
40
 
37
- result = ots_get_doc_text(doc, &result_len);
41
+ summary = rb_str_new2(ots_get_doc_text(doc, &result_len));
42
+ topics = rb_str_new2((const char *)doc->title);
38
43
 
39
44
  ots_free_article(doc);
40
45
 
41
- return rb_str_new2(result);
46
+ if (rb_topics == Qtrue) {
47
+ result = rb_ary_new();
48
+ rb_ary_push(result, summary);
49
+ rb_ary_push(result, topics);
50
+ return result;
51
+ } else {
52
+ return summary;
53
+ }
42
54
  }
@@ -8,5 +8,5 @@
8
8
 
9
9
  #ifndef __summarize_h__
10
10
  #define __summarize_h__
11
- static VALUE summarize(VALUE, VALUE, VALUE, VALUE);
11
+ static VALUE summarize(VALUE, VALUE, VALUE, VALUE, VALUE);
12
12
  #endif
@@ -10,7 +10,7 @@ class Hash #:nodoc:
10
10
  end unless {}.respond_to? 'symbolize_keys'
11
11
 
12
12
  module Summarize
13
- VERSION = "1.0.2"
13
+ VERSION = "1.0.3"
14
14
 
15
15
  LANGUAGES = [
16
16
  'bg', # Bulgarian
@@ -54,8 +54,9 @@ module Summarize
54
54
 
55
55
  def self.parse_options(options = {}) #:nodoc:
56
56
  default_options = {
57
- :ratio => 25, # percentage
58
- :language => 'en' # ISO 639-1 code
57
+ :ratio => 25, # percentage
58
+ :language => 'en', # ISO 639-1 code
59
+ :topics => false
59
60
  }
60
61
 
61
62
  options = default_options.merge(options.symbolize_keys)
@@ -67,7 +68,7 @@ module Summarize
67
68
  dict_file = File.join(File.expand_path(File.dirname(__FILE__)), "../ext/summarize/dic/#{options[:language]}")
68
69
  end
69
70
 
70
- return [dict_file, options[:ratio]]
71
+ return [dict_file, options[:ratio], options[:topics]]
71
72
  end
72
73
 
73
74
  end
@@ -87,12 +88,16 @@ class String
87
88
  # dictionary::
88
89
  # A path to a custom stemming XML file
89
90
  #
91
+ # topics::
92
+ # A boolean indicating whether to return topics as well. Return value will be
93
+ # an array instead with content first and topics second. Default is false.
94
+ #
90
95
  # == Returns:
91
- # A string summary
96
+ # A string summary OR an array of content and topics
92
97
  #
93
98
  def summarize(options = {})
94
- dict_file, ratio = Summarize.parse_options(options)
95
- String.send(:summarize, self, dict_file, ratio)
99
+ dict_file, ratio, topics = Summarize.parse_options(options)
100
+ String.send(:summarize, self, dict_file, ratio, topics)
96
101
  end
97
102
 
98
103
  end
@@ -111,8 +116,12 @@ class File
111
116
  # dictionary::
112
117
  # A path to a custom stemming XML file
113
118
  #
119
+ # topics::
120
+ # A boolean indicating whether to return topics as well. Return value will be
121
+ # an array instead with content first and topics second. Default is false.
122
+ #
114
123
  # == Returns:
115
- # A string summary
124
+ # A string summary OR an array of content and topics
116
125
  #
117
126
  def summarize(options = {})
118
127
  self.read.summarize(options)
@@ -11,7 +11,7 @@ Gem::Specification.new do |s|
11
11
  s.email = %q{sean.soper@gmail.com}
12
12
 
13
13
  s.rubygems_version = %q{1.3.7}
14
- s.date = %q{2010-11-30}
14
+ s.date = %q{2010-12-06}
15
15
  s.rubyforge_project = "summarize"
16
16
 
17
17
  s.require_paths = ["lib"]
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 1
7
7
  - 0
8
- - 2
9
- version: 1.0.2
8
+ - 3
9
+ version: 1.0.3
10
10
  platform: ruby
11
11
  authors:
12
12
  - Sean Soper
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-11-30 00:00:00 -05:00
17
+ date: 2010-12-06 00:00:00 -05:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency