abstractifier 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: bacbd5f6a3e64e0218dd8bd75b850047d7cb4e21
4
+ data.tar.gz: 5ff554bb64332d3dcd3ea8efb05d3f366cf8dc1a
5
+ SHA512:
6
+ metadata.gz: 6a8bbd4b65ffbe08dedc1ecb8c1729dda951c663b5f755622034b20a581995c542c2f384bb10ac85f003b4fcb775bd3845f670c9181c88608f6a38a2851521a7
7
+ data.tar.gz: 50698eb3aaec5ab56abbbfb70090de5b1c03c8c360603b70379e615a9cbc272a646b0544680cf2db5f135b4f2d35b108550bfc21f146a7f771fc8a650f2524db
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Altmetric LLP
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,45 @@
1
+ # Abstractifier [![Build Status](https://travis-ci.org/altmetric/abstractifier.svg?branch=master)](https://travis-ci.org/altmetric/abstractifier)
2
+
3
+ Builds a short abstract of a supplied text by extracting the first couple of
4
+ sentences and trimming off punctuation etc.
5
+
6
+ ## Usage
7
+
8
+ ### Basic usage
9
+
10
+ ```ruby
11
+ require 'abstractifier'
12
+
13
+ abstractifier = Abstractifier.new
14
+ #=> #<Abstractifier:0x007ffe350993c8 @elider="…", @max_length=250, @min_length=80>
15
+
16
+ abstractifier.abstractify("This is a longish piece of text. It contains several sentences. It is long enough that we will need to build an abstract for it, but the first two sentences are quite short, so we will need to include some content from the third sentence so that we can fulfill both our minimum and maximum abstract lengths.")
17
+ #=> "This is a longish piece of text. It contains several sentences. It is long enough that we will need to build an abstract for it, but the first two sentences are quite short, so we will need to include some content from the third sentence so that we…"
18
+ ```
19
+
20
+ It's possible to set minimum and maximum lengths on the abstract
21
+
22
+ ```ruby
23
+ abstractifier = Abstractifier.new(max: 100, min: 20)
24
+ #=> #<Abstractifier:0x007fca72d64040 @elider="…", @max_length=100, @min_length=20>
25
+
26
+ abstractifier.abstractify("This abstract should be much shorter. It should only include the first sentence.")
27
+ #=> "This abstract should be much shorter."
28
+ ```
29
+
30
+ We can also change the character used to elide sentences (though I don't really know why you'd do this)
31
+
32
+ ```ruby
33
+ abstractifier = Abstractifier.new(max: 30, min: 20, elider: '!!!')
34
+ #=> #<Abstractifier:0x007fca72d64040 @elider="!!!", @max_length=100, @min_length=30>
35
+
36
+ abstractifier.abstractify("This abstract should be much shorter but long enough that we have to truncate the sentence in the middle")
37
+ #=> "This abstract should be much!!!"
38
+ ```
39
+
40
+
41
+ ## License
42
+
43
+ Copyright © 2015 Altmetric LLP
44
+
45
+ Distributed under the MIT License.
@@ -0,0 +1,59 @@
1
+ # encoding: UTF-8
2
+
3
+ class Abstractifier
4
+ DEFAULT_MINIMUM_LENGTH = 80
5
+ DEFAULT_MAXIMUM_LENGTH = 250
6
+
7
+ attr_accessor :max_length, :min_length, :elider
8
+
9
+ def initialize(options = {})
10
+ @min_length = options.fetch(:min, DEFAULT_MINIMUM_LENGTH)
11
+ @max_length = options.fetch(:max, DEFAULT_MAXIMUM_LENGTH)
12
+ @elider = options.fetch(:elider, '…')
13
+ end
14
+
15
+ def abstractify(string)
16
+ output = ''
17
+
18
+ extract_sentences(string).each do |sentence|
19
+ output << "#{sentence}. "
20
+ break if output.length >= min_length
21
+ end
22
+
23
+ output = forcibly_truncate(output) if output.length > max_length
24
+ output = tidy(output)
25
+
26
+ output
27
+ end
28
+
29
+ private
30
+
31
+ def forcibly_truncate(string)
32
+ truncated = string[0, max_length + 1].strip.split(/\s\b\w+$/).first
33
+
34
+ strip_trailing_punctuation(truncated)
35
+ end
36
+
37
+ def extract_sentences(string)
38
+ string
39
+ .gsub(/[[:space:]]+/, ' ')
40
+ .split(/\.(?:\s|$)/)
41
+ end
42
+
43
+ def strip_trailing_punctuation(string)
44
+ if string[-1] =~ /[\.\?\!]/
45
+ string
46
+ elsif string[-1] =~ /[[:punct:]]/
47
+ string[0..-2] + elider
48
+ else
49
+ string + elider
50
+ end
51
+ end
52
+
53
+ def tidy(string)
54
+ string
55
+ .gsub(/[[:space:]]+/, ' ')
56
+ .gsub(/[[:space:]](,|\.)/, '\1')
57
+ .strip
58
+ end
59
+ end
@@ -0,0 +1,56 @@
1
+ # encoding: UTF-8
2
+ # rubocop:disable Metrics/LineLength
3
+
4
+ require 'abstractifier'
5
+
6
+ RSpec.describe Abstractifier do
7
+ let(:instance) { described_class.new(max: 50, min: 20) }
8
+
9
+ it 'elides to sentence boundaries' do
10
+ expect(
11
+ instance.abstractify(
12
+ 'This is the first sentence. This is the second sentence.'
13
+ )
14
+ ).to eq('This is the first sentence.')
15
+ end
16
+
17
+ it 'elides mid-sentence when no suitable sentence boundary.' do
18
+ expect(
19
+ instance.abstractify(
20
+ 'This is short. This is much longer second sentence, which much be elided mid-sentence.'
21
+ )
22
+ ).to eq('This is short. This is much longer second sentence…')
23
+ end
24
+
25
+ it 'elides mid-word when too long' do
26
+ expect(
27
+ instance.abstractify(
28
+ 'Supercalifragilisticexpialidociousisareallylongwordthatmustbetruncated'
29
+ )
30
+ ).to eq('Supercalifragilisticexpialidociousisareallylongword…')
31
+ end
32
+
33
+ it 'retains trailing punctuation from sentence boundaries' do
34
+ expect(
35
+ instance.abstractify(
36
+ 'I really think this is a good idea, personally! But whatever you think, I guess.'
37
+ )
38
+ ).to eq('I really think this is a good idea, personally!')
39
+ end
40
+
41
+ it 'retains trailing punctuation from sentence boundaries' do
42
+ expect(
43
+ instance.abstractify(
44
+ 'I really think this is a good idea, personally! But whatever you think, I guess.'
45
+ )
46
+ ).to eq('I really think this is a good idea, personally!')
47
+ end
48
+
49
+ it 'removes trailing punctuation from word boundaries' do
50
+ expect(
51
+ instance.abstractify(
52
+ 'This sentence should be truncated in a few words, but should have no trailing punctuation'
53
+ )
54
+ ).to eq('This sentence should be truncated in a few words…')
55
+ end
56
+ end
@@ -0,0 +1,14 @@
1
+ RSpec.configure do |config|
2
+ config.filter_run :focus
3
+ config.run_all_when_everything_filtered = true
4
+ config.disable_monkey_patching!
5
+ config.warnings = true
6
+ config.order = :random
7
+ Kernel.srand config.seed
8
+
9
+ config.default_formatter = 'doc' if config.files_to_run.one?
10
+
11
+ config.expect_with :rspec do |expectations|
12
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
13
+ end
14
+ end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: abstractifier
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Matthew MacLeod
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rspec
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3.2'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.2'
27
+ description: |2
28
+ Simple abstract generation library for Ruby.
29
+ email: support@altmetric.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - LICENSE
35
+ - README.md
36
+ - lib/abstractifier.rb
37
+ - spec/abstractifier_spec.rb
38
+ - spec/spec_helper.rb
39
+ homepage: https://github.com/altmetric/abstractifier
40
+ licenses:
41
+ - MIT
42
+ metadata: {}
43
+ post_install_message:
44
+ rdoc_options: []
45
+ require_paths:
46
+ - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ requirements: []
58
+ rubyforge_project:
59
+ rubygems_version: 2.4.5
60
+ signing_key:
61
+ specification_version: 4
62
+ summary: A small library to generate abstracts from text.
63
+ test_files:
64
+ - spec/abstractifier_spec.rb
65
+ - spec/spec_helper.rb