sentence_extractor 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ tester.rb
19
+
data/.project ADDED
@@ -0,0 +1,12 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <projectDescription>
3
+ <name>sentence_extractor_gem</name>
4
+ <comment></comment>
5
+ <projects>
6
+ </projects>
7
+ <buildSpec>
8
+ </buildSpec>
9
+ <natures>
10
+ <nature>com.aptana.ruby.core.rubynature</nature>
11
+ </natures>
12
+ </projectDescription>
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in sentence_extractor.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Sebastian Glazebrook
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,34 @@
1
+ # SentenceExtractor
2
+
3
+ This gem takes takes a bunch of text and returns the sentences found based on language specific rules.
4
+ E.g. start and end delimiters etc.
5
+
6
+ ## Installation
7
+
8
+ Add this line to your application's Gemfile:
9
+
10
+ gem 'sentence_extractor'
11
+
12
+ And then execute:
13
+
14
+ $ bundle
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install sentence_extractor
19
+
20
+ ## Usage
21
+
22
+ require 'sentence_extractor'
23
+
24
+ sen_ext = SentenceExtractor::Extractor.new
25
+
26
+ sentences = sen_ext.extract_sentences("Sentence 1! Sentence 2. Sentence 3?")
27
+
28
+ ## Contributing
29
+
30
+ 1. Fork it
31
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
32
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
33
+ 4. Push to the branch (`git push origin my-new-feature`)
34
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
@@ -0,0 +1,59 @@
1
+ module SentenceExtractor
2
+
3
+
4
+ class Extractor
5
+
6
+ @language = nil
7
+ @text = nil
8
+ @delimiters = nil
9
+ @first_letter_capitilization = nil
10
+ @reg_exp = nil
11
+
12
+ def initialize( language = "en", delimiters = [".", "!","?"], first_letter_capitilization = false)
13
+ @language = language
14
+ @delimiters = delimiters
15
+ @first_letter_capitilization = first_letter_capitilization
16
+ end
17
+
18
+ def extract_sentences(text = "Insert some text here. Please!")
19
+ @text = text
20
+ get_sentences
21
+ end
22
+
23
+ private
24
+ # returns an array of sentences
25
+ def get_sentences
26
+ sentences = Array.new
27
+ if possible_sentences?
28
+ sentences = get_array_of_sentences
29
+ end
30
+ sentences
31
+ end
32
+
33
+ def possible_sentences?
34
+ # build regexp
35
+ @reg_exp = "\\" + @delimiters.join("|\\")
36
+ # check for any delimiters
37
+ @text =~ /#{@reg_exp}/
38
+ end
39
+
40
+ def get_array_of_sentences
41
+ sentences = Array.new
42
+ remaining_text = @text
43
+ match = true
44
+ # go through and find all sentences
45
+ while match != nil
46
+ sentence_end_point = remaining_text =~ /#{@reg_exp}/ # check for sentences
47
+ if sentence_end_point
48
+ sentences << remaining_text[0..sentence_end_point] # add new sentence to array
49
+ remaining_text = remaining_text[sentence_end_point+1..remaining_text.size] # set the rest of the text to be processed.
50
+ else
51
+ match = nil
52
+ end
53
+ end
54
+ sentences
55
+ end
56
+
57
+ end
58
+
59
+ end
@@ -0,0 +1,3 @@
1
+ module SentenceExtractor
2
+ VERSION = "0.1.1"
3
+ end
@@ -0,0 +1,6 @@
1
+ require "sentence_extractor/version"
2
+ require "sentence_extractor/extractor"
3
+
4
+ module SentenceExtractor
5
+
6
+ end
@@ -0,0 +1,17 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/sentence_extractor/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Seb Glazebrook"]
6
+ gem.email = ["me@sebglazebrook.com"]
7
+ gem.description = %q{This gem takes takes a bunch of text and returns the sentences found based on language specific rules. E.g. start and end delimiters etc.}
8
+ gem.summary = %q{Give it some text and it will return an array of sentences.}
9
+ gem.homepage = ""
10
+
11
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
12
+ gem.files = `git ls-files`.split("\n")
13
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
14
+ gem.name = "sentence_extractor"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = SentenceExtractor::VERSION
17
+ end
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sentence_extractor
3
+ version: !ruby/object:Gem::Version
4
+ hash: 25
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 1
10
+ version: 0.1.1
11
+ platform: ruby
12
+ authors:
13
+ - Seb Glazebrook
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-03-21 00:00:00 Z
19
+ dependencies: []
20
+
21
+ description: This gem takes takes a bunch of text and returns the sentences found based on language specific rules. E.g. start and end delimiters etc.
22
+ email:
23
+ - me@sebglazebrook.com
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files: []
29
+
30
+ files:
31
+ - .gitignore
32
+ - .project
33
+ - Gemfile
34
+ - LICENSE
35
+ - README.md
36
+ - Rakefile
37
+ - lib/sentence_extractor.rb
38
+ - lib/sentence_extractor/extractor.rb
39
+ - lib/sentence_extractor/version.rb
40
+ - sentence_extractor.gemspec
41
+ homepage: ""
42
+ licenses: []
43
+
44
+ post_install_message:
45
+ rdoc_options: []
46
+
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ hash: 3
55
+ segments:
56
+ - 0
57
+ version: "0"
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ hash: 3
64
+ segments:
65
+ - 0
66
+ version: "0"
67
+ requirements: []
68
+
69
+ rubyforge_project:
70
+ rubygems_version: 1.8.9
71
+ signing_key:
72
+ specification_version: 3
73
+ summary: Give it some text and it will return an array of sentences.
74
+ test_files: []
75
+