abstractifier 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +45 -0
- data/lib/abstractifier.rb +59 -0
- data/spec/abstractifier_spec.rb +56 -0
- data/spec/spec_helper.rb +14 -0
- metadata +65 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: bacbd5f6a3e64e0218dd8bd75b850047d7cb4e21
|
4
|
+
data.tar.gz: 5ff554bb64332d3dcd3ea8efb05d3f366cf8dc1a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6a8bbd4b65ffbe08dedc1ecb8c1729dda951c663b5f755622034b20a581995c542c2f384bb10ac85f003b4fcb775bd3845f670c9181c88608f6a38a2851521a7
|
7
|
+
data.tar.gz: 50698eb3aaec5ab56abbbfb70090de5b1c03c8c360603b70379e615a9cbc272a646b0544680cf2db5f135b4f2d35b108550bfc21f146a7f771fc8a650f2524db
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Altmetric LLP
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# Abstractifier [![Build Status](https://travis-ci.org/altmetric/abstractifier.svg?branch=master)](https://travis-ci.org/altmetric/abstractifier)
|
2
|
+
|
3
|
+
Builds a short abstract of a supplied text by extracting the first couple of
|
4
|
+
sentences and trimming off punctuation etc.
|
5
|
+
|
6
|
+
## Usage
|
7
|
+
|
8
|
+
### Basic usage
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
require 'abstractifier'
|
12
|
+
|
13
|
+
abstractifier = Abstractifier.new
|
14
|
+
#=> #<Abstractifier:0x007ffe350993c8 @elider="…", @max_length=250, @min_length=80>
|
15
|
+
|
16
|
+
abstractifier.abstractify("This is a longish piece of text. It contains several sentences. It is long enough that we will need to build an abstract for it, but the first two sentences are quite short, so we will need to include some content from the third sentence so that we can fulfill both our minimum and maximum abstract lengths.")
|
17
|
+
#=> "This is a longish piece of text. It contains several sentences. It is long enough that we will need to build an abstract for it, but the first two sentences are quite short, so we will need to include some content from the third sentence so that we…"
|
18
|
+
```
|
19
|
+
|
20
|
+
It's possible to set minimum and maximum lengths on the abstract
|
21
|
+
|
22
|
+
```ruby
|
23
|
+
abstractifier = Abstractifier.new(max: 100, min: 20)
|
24
|
+
#=> #<Abstractifier:0x007fca72d64040 @elider="…", @max_length=100, @min_length=20>
|
25
|
+
|
26
|
+
abstractifier.abstractify("This abstract should be much shorter. It should only include the first sentence.")
|
27
|
+
#=> "This abstract should be much shorter."
|
28
|
+
```
|
29
|
+
|
30
|
+
We can also change the character used to elide sentences (though I don't really know why you'd do this)
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
abstractifier = Abstractifier.new(max: 30, min: 20, elider: '!!!')
|
34
|
+
#=> #<Abstractifier:0x007fca72d64040 @elider="!!!", @max_length=100, @min_length=30>
|
35
|
+
|
36
|
+
abstractifier.abstractify("This abstract should be much shorter but long enough that we have to truncate the sentence in the middle")
|
37
|
+
#=> "This abstract should be much!!!"
|
38
|
+
```
|
39
|
+
|
40
|
+
|
41
|
+
## License
|
42
|
+
|
43
|
+
Copyright © 2015 Altmetric LLP
|
44
|
+
|
45
|
+
Distributed under the MIT License.
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
class Abstractifier
|
4
|
+
DEFAULT_MINIMUM_LENGTH = 80
|
5
|
+
DEFAULT_MAXIMUM_LENGTH = 250
|
6
|
+
|
7
|
+
attr_accessor :max_length, :min_length, :elider
|
8
|
+
|
9
|
+
def initialize(options = {})
|
10
|
+
@min_length = options.fetch(:min, DEFAULT_MINIMUM_LENGTH)
|
11
|
+
@max_length = options.fetch(:max, DEFAULT_MAXIMUM_LENGTH)
|
12
|
+
@elider = options.fetch(:elider, '…')
|
13
|
+
end
|
14
|
+
|
15
|
+
def abstractify(string)
|
16
|
+
output = ''
|
17
|
+
|
18
|
+
extract_sentences(string).each do |sentence|
|
19
|
+
output << "#{sentence}. "
|
20
|
+
break if output.length >= min_length
|
21
|
+
end
|
22
|
+
|
23
|
+
output = forcibly_truncate(output) if output.length > max_length
|
24
|
+
output = tidy(output)
|
25
|
+
|
26
|
+
output
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def forcibly_truncate(string)
|
32
|
+
truncated = string[0, max_length + 1].strip.split(/\s\b\w+$/).first
|
33
|
+
|
34
|
+
strip_trailing_punctuation(truncated)
|
35
|
+
end
|
36
|
+
|
37
|
+
def extract_sentences(string)
|
38
|
+
string
|
39
|
+
.gsub(/[[:space:]]+/, ' ')
|
40
|
+
.split(/\.(?:\s|$)/)
|
41
|
+
end
|
42
|
+
|
43
|
+
def strip_trailing_punctuation(string)
|
44
|
+
if string[-1] =~ /[\.\?\!]/
|
45
|
+
string
|
46
|
+
elsif string[-1] =~ /[[:punct:]]/
|
47
|
+
string[0..-2] + elider
|
48
|
+
else
|
49
|
+
string + elider
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def tidy(string)
|
54
|
+
string
|
55
|
+
.gsub(/[[:space:]]+/, ' ')
|
56
|
+
.gsub(/[[:space:]](,|\.)/, '\1')
|
57
|
+
.strip
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
# rubocop:disable Metrics/LineLength
|
3
|
+
|
4
|
+
require 'abstractifier'
|
5
|
+
|
6
|
+
RSpec.describe Abstractifier do
|
7
|
+
let(:instance) { described_class.new(max: 50, min: 20) }
|
8
|
+
|
9
|
+
it 'elides to sentence boundaries' do
|
10
|
+
expect(
|
11
|
+
instance.abstractify(
|
12
|
+
'This is the first sentence. This is the second sentence.'
|
13
|
+
)
|
14
|
+
).to eq('This is the first sentence.')
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'elides mid-sentence when no suitable sentence boundary.' do
|
18
|
+
expect(
|
19
|
+
instance.abstractify(
|
20
|
+
'This is short. This is much longer second sentence, which much be elided mid-sentence.'
|
21
|
+
)
|
22
|
+
).to eq('This is short. This is much longer second sentence…')
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'elides mid-word when too long' do
|
26
|
+
expect(
|
27
|
+
instance.abstractify(
|
28
|
+
'Supercalifragilisticexpialidociousisareallylongwordthatmustbetruncated'
|
29
|
+
)
|
30
|
+
).to eq('Supercalifragilisticexpialidociousisareallylongword…')
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'retains trailing punctuation from sentence boundaries' do
|
34
|
+
expect(
|
35
|
+
instance.abstractify(
|
36
|
+
'I really think this is a good idea, personally! But whatever you think, I guess.'
|
37
|
+
)
|
38
|
+
).to eq('I really think this is a good idea, personally!')
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'retains trailing punctuation from sentence boundaries' do
|
42
|
+
expect(
|
43
|
+
instance.abstractify(
|
44
|
+
'I really think this is a good idea, personally! But whatever you think, I guess.'
|
45
|
+
)
|
46
|
+
).to eq('I really think this is a good idea, personally!')
|
47
|
+
end
|
48
|
+
|
49
|
+
it 'removes trailing punctuation from word boundaries' do
|
50
|
+
expect(
|
51
|
+
instance.abstractify(
|
52
|
+
'This sentence should be truncated in a few words, but should have no trailing punctuation'
|
53
|
+
)
|
54
|
+
).to eq('This sentence should be truncated in a few words…')
|
55
|
+
end
|
56
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
RSpec.configure do |config|
|
2
|
+
config.filter_run :focus
|
3
|
+
config.run_all_when_everything_filtered = true
|
4
|
+
config.disable_monkey_patching!
|
5
|
+
config.warnings = true
|
6
|
+
config.order = :random
|
7
|
+
Kernel.srand config.seed
|
8
|
+
|
9
|
+
config.default_formatter = 'doc' if config.files_to_run.one?
|
10
|
+
|
11
|
+
config.expect_with :rspec do |expectations|
|
12
|
+
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
13
|
+
end
|
14
|
+
end
|
metadata
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: abstractifier
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Matthew MacLeod
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-09-18 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rspec
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '3.2'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '3.2'
|
27
|
+
description: |2
|
28
|
+
Simple abstract generation library for Ruby.
|
29
|
+
email: support@altmetric.com
|
30
|
+
executables: []
|
31
|
+
extensions: []
|
32
|
+
extra_rdoc_files: []
|
33
|
+
files:
|
34
|
+
- LICENSE
|
35
|
+
- README.md
|
36
|
+
- lib/abstractifier.rb
|
37
|
+
- spec/abstractifier_spec.rb
|
38
|
+
- spec/spec_helper.rb
|
39
|
+
homepage: https://github.com/altmetric/abstractifier
|
40
|
+
licenses:
|
41
|
+
- MIT
|
42
|
+
metadata: {}
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options: []
|
45
|
+
require_paths:
|
46
|
+
- lib
|
47
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
requirements: []
|
58
|
+
rubyforge_project:
|
59
|
+
rubygems_version: 2.4.5
|
60
|
+
signing_key:
|
61
|
+
specification_version: 4
|
62
|
+
summary: A small library to generate abstracts from text.
|
63
|
+
test_files:
|
64
|
+
- spec/abstractifier_spec.rb
|
65
|
+
- spec/spec_helper.rb
|