demystify 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +1 -0
- data/demystify.gemspec +34 -0
- data/lib/demystify/version.rb +3 -0
- data/lib/demystify.rb +36 -2
- data/sample2.txt +1 -0
- metadata +5 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5971ce18068cb4b0be4e3089c81e8d1aeda25d93
|
4
|
+
data.tar.gz: bfc5e6fd91f9a7dd292384cdc2b6bb9ccf43212c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 261f97b8e0274588b73844ce2aa17d8a0d64b377cdf9420ec76d3160326909f9188104d1b27b7256334e2385a046a64f6b13c94cd6ad5a42f7fbb6632be5a301
|
7
|
+
data.tar.gz: f6c8113b838cd63cddb2eaa265a394d5c15b05fcac98e7ebcac06fadfc56d1e34df58b64a159221514e9578a3d06c4840f72ea8f0b7b67dc83907716e3063343
|
data/.gitignore
CHANGED
data/README.md
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
#Demystify
|
data/demystify.gemspec
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'demystify/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "demystify"
|
8
|
+
spec.version = Demystify::VERSION
|
9
|
+
spec.authors = ["DouglasTGordon"]
|
10
|
+
spec.email = ["douglastgordon@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Tools for text analysis and NLP.}
|
13
|
+
spec.description = %q{Demystify allows you to extract relevant information from text for easier analysis.}
|
14
|
+
spec.homepage = "https://github.com/douglastgordon/Demystify"
|
15
|
+
|
16
|
+
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
17
|
+
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
18
|
+
# if spec.respond_to?(:metadata)
|
19
|
+
# spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
|
20
|
+
# else
|
21
|
+
# raise "RubyGems 2.0 or newer is required to protect against " \
|
22
|
+
# "public gem pushes."
|
23
|
+
# end
|
24
|
+
|
25
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
26
|
+
f.match(%r{^(test|spec|features)/})
|
27
|
+
end
|
28
|
+
spec.bindir = "exe"
|
29
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
30
|
+
spec.require_paths = ["lib"]
|
31
|
+
|
32
|
+
spec.add_development_dependency "bundler", "~> 1.13"
|
33
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
34
|
+
end
|
data/lib/demystify.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'byebug'
|
2
|
+
|
2
3
|
module Demystify
|
3
4
|
|
4
5
|
#From wikipedia
|
@@ -19,11 +20,15 @@ module Demystify
|
|
19
20
|
|
20
21
|
class Text
|
21
22
|
|
22
|
-
attr_accessor :content, :chars
|
23
|
+
attr_accessor :content, :chars, :words
|
23
24
|
|
24
25
|
def initialize(file)
|
25
26
|
@content = open(file).read
|
26
27
|
@chars = @content.split("")
|
28
|
+
@words = @content.split(/[^[[:word:]]]+/)
|
29
|
+
|
30
|
+
@sentences = make_sentences
|
31
|
+
|
27
32
|
end
|
28
33
|
|
29
34
|
def char_count
|
@@ -96,7 +101,36 @@ module Demystify
|
|
96
101
|
end
|
97
102
|
|
98
103
|
def word_count
|
99
|
-
@
|
104
|
+
@words.count
|
105
|
+
end
|
106
|
+
|
107
|
+
def sentence_count
|
108
|
+
@sentences.length
|
109
|
+
end
|
110
|
+
|
111
|
+
def first_words_of_sentences
|
112
|
+
first_words = []
|
113
|
+
@sentences.each do |sentence|
|
114
|
+
first_words << sentence.first
|
115
|
+
end
|
116
|
+
first_words
|
117
|
+
end
|
118
|
+
|
119
|
+
def last_words_of_sentences
|
120
|
+
last_words = []
|
121
|
+
@sentences.each do |sentence|
|
122
|
+
last_words << sentence.last
|
123
|
+
end
|
124
|
+
last_words
|
125
|
+
end
|
126
|
+
|
127
|
+
private
|
128
|
+
|
129
|
+
def make_sentences
|
130
|
+
sentence_regex = /((?<=[a-z0-9)][.?!])|(?<=[a-z0-9][.?!]"))\s+(?="?[A-Z])/
|
131
|
+
sentences = @content.split(sentence_regex)
|
132
|
+
sentences.select!{|sentence| sentence.length > 1}
|
133
|
+
sentences.map{|sentence| sentence.chomp}
|
100
134
|
end
|
101
135
|
|
102
136
|
end
|
data/sample2.txt
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Whale is the common name for a widely distributed and diverse group of fully aquatic placental marine mammals. They are an informal grouping within the infraorder Cetacea, usually excluding dolphins and porpoises. Whales, dolphins and porpoises belong to the order Cetartiodactyla with even-toed ungulates and their closest living relatives are the hippopotamuses, having diverged about 40 million years ago. The two parvorders of whales, baleen whales (Mysticeti) and toothed whales (Odontoceti), are thought to have split apart around 34 million years ago. The whales comprise eight extant families: Balaenopteridae (the rorquals), Balaenidae (right whales), Cetotheriidae (the pygmy right whale), Eschrichtiidae (the gray whale), Monodontidae (belugas and narwhals), Physeteridae (the sperm whale), Kogiidae (the dwarf and pygmy sperm whale), and Ziphiidae (the beaked whales).
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: demystify
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- DouglasTGordon
|
@@ -48,8 +48,12 @@ extra_rdoc_files: []
|
|
48
48
|
files:
|
49
49
|
- ".gitignore"
|
50
50
|
- Gemfile
|
51
|
+
- README.md
|
52
|
+
- demystify.gemspec
|
51
53
|
- lib/demystify.rb
|
54
|
+
- lib/demystify/version.rb
|
52
55
|
- sample1.txt
|
56
|
+
- sample2.txt
|
53
57
|
homepage: https://github.com/douglastgordon/Demystify
|
54
58
|
licenses: []
|
55
59
|
metadata: {}
|