demystify 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6401829101cacafbc3bbd667bbc6f7003fd9e1df
4
- data.tar.gz: 8558b8f978dcc94dd685acf04e5b62ccbfd90ebe
3
+ metadata.gz: 5971ce18068cb4b0be4e3089c81e8d1aeda25d93
4
+ data.tar.gz: bfc5e6fd91f9a7dd292384cdc2b6bb9ccf43212c
5
5
  SHA512:
6
- metadata.gz: aa98c469ade8a8ebaa75b6cbe74a6c4fbb822f1822ba8be580eff1a90f58ca20039c6cff0076a306a32eca9858a9028f22851dc94a128c27a668a0176c6d2009
7
- data.tar.gz: 0b1a2840b3e6564d777890959f016ab4cee51fdaff444e91e1ef962f296b4c1e475322c889ba6636c6e6ceba2d3802abf4238a8cc146108f0439a6f4f5e1a342
6
+ metadata.gz: 261f97b8e0274588b73844ce2aa17d8a0d64b377cdf9420ec76d3160326909f9188104d1b27b7256334e2385a046a64f6b13c94cd6ad5a42f7fbb6632be5a301
7
+ data.tar.gz: f6c8113b838cd63cddb2eaa265a394d5c15b05fcac98e7ebcac06fadfc56d1e34df58b64a159221514e9578a3d06c4840f72ea8f0b7b67dc83907716e3063343
data/.gitignore CHANGED
@@ -2,3 +2,4 @@ Gemfile.lock
2
2
  .byebug_history
3
3
  Rakefile
4
4
  bin/
5
+ .demystify-0.0.1.gem
data/README.md ADDED
@@ -0,0 +1 @@
1
+ #Demystify
data/demystify.gemspec ADDED
@@ -0,0 +1,34 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'demystify/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "demystify"
8
+ spec.version = Demystify::VERSION
9
+ spec.authors = ["DouglasTGordon"]
10
+ spec.email = ["douglastgordon@gmail.com"]
11
+
12
+ spec.summary = %q{Tools for text analysis and NLP.}
13
+ spec.description = %q{Demystify allows you to extract relevant information from text for easier analysis.}
14
+ spec.homepage = "https://github.com/douglastgordon/Demystify"
15
+
16
+ # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
17
+ # to allow pushing to a single host or delete this section to allow pushing to any host.
18
+ # if spec.respond_to?(:metadata)
19
+ # spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
20
+ # else
21
+ # raise "RubyGems 2.0 or newer is required to protect against " \
22
+ # "public gem pushes."
23
+ # end
24
+
25
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
26
+ f.match(%r{^(test|spec|features)/})
27
+ end
28
+ spec.bindir = "exe"
29
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
30
+ spec.require_paths = ["lib"]
31
+
32
+ spec.add_development_dependency "bundler", "~> 1.13"
33
+ spec.add_development_dependency "rake", "~> 10.0"
34
+ end
@@ -0,0 +1,3 @@
1
+ module Demystify
2
+ VERSION = "0.0.2"
3
+ end
data/lib/demystify.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'byebug'
2
+
2
3
  module Demystify
3
4
 
4
5
  #From wikipedia
@@ -19,11 +20,15 @@ module Demystify
19
20
 
20
21
  class Text
21
22
 
22
- attr_accessor :content, :chars
23
+ attr_accessor :content, :chars, :words
23
24
 
24
25
  def initialize(file)
25
26
  @content = open(file).read
26
27
  @chars = @content.split("")
28
+ @words = @content.split(/[^[[:word:]]]+/)
29
+
30
+ @sentences = make_sentences
31
+
27
32
  end
28
33
 
29
34
  def char_count
@@ -96,7 +101,36 @@ module Demystify
96
101
  end
97
102
 
98
103
  def word_count
99
- @content.split(/[^[[:word:]]]+/)
104
+ @words.count
105
+ end
106
+
107
+ def sentence_count
108
+ @sentences.length
109
+ end
110
+
111
+ def first_words_of_sentences
112
+ first_words = []
113
+ @sentences.each do |sentence|
114
+ first_words << sentence.first
115
+ end
116
+ first_words
117
+ end
118
+
119
+ def last_words_of_sentences
120
+ last_words = []
121
+ @sentences.each do |sentence|
122
+ last_words << sentence.last
123
+ end
124
+ last_words
125
+ end
126
+
127
+ private
128
+
129
+ def make_sentences
130
+ sentence_regex = /((?<=[a-z0-9)][.?!])|(?<=[a-z0-9][.?!]"))\s+(?="?[A-Z])/
131
+ sentences = @content.split(sentence_regex)
132
+ sentences.select!{|sentence| sentence.length > 1}
133
+ sentences.map{|sentence| sentence.chomp}
100
134
  end
101
135
 
102
136
  end
data/sample2.txt ADDED
@@ -0,0 +1 @@
1
+ Whale is the common name for a widely distributed and diverse group of fully aquatic placental marine mammals. They are an informal grouping within the infraorder Cetacea, usually excluding dolphins and porpoises. Whales, dolphins and porpoises belong to the order Cetartiodactyla with even-toed ungulates and their closest living relatives are the hippopotamuses, having diverged about 40 million years ago. The two parvorders of whales, baleen whales (Mysticeti) and toothed whales (Odontoceti), are thought to have split apart around 34 million years ago. The whales comprise eight extant families: Balaenopteridae (the rorquals), Balaenidae (right whales), Cetotheriidae (the pygmy right whale), Eschrichtiidae (the gray whale), Monodontidae (belugas and narwhals), Physeteridae (the sperm whale), Kogiidae (the dwarf and pygmy sperm whale), and Ziphiidae (the beaked whales).
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: demystify
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - DouglasTGordon
@@ -48,8 +48,12 @@ extra_rdoc_files: []
48
48
  files:
49
49
  - ".gitignore"
50
50
  - Gemfile
51
+ - README.md
52
+ - demystify.gemspec
51
53
  - lib/demystify.rb
54
+ - lib/demystify/version.rb
52
55
  - sample1.txt
56
+ - sample2.txt
53
57
  homepage: https://github.com/douglastgordon/Demystify
54
58
  licenses: []
55
59
  metadata: {}