demystify 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5971ce18068cb4b0be4e3089c81e8d1aeda25d93
4
- data.tar.gz: bfc5e6fd91f9a7dd292384cdc2b6bb9ccf43212c
3
+ metadata.gz: 70032c0a2eaf6eeecedc921876cf1760e160c7fb
4
+ data.tar.gz: d855804fb3f43777a71cac79dd8a54b5bd990896
5
5
  SHA512:
6
- metadata.gz: 261f97b8e0274588b73844ce2aa17d8a0d64b377cdf9420ec76d3160326909f9188104d1b27b7256334e2385a046a64f6b13c94cd6ad5a42f7fbb6632be5a301
7
- data.tar.gz: f6c8113b838cd63cddb2eaa265a394d5c15b05fcac98e7ebcac06fadfc56d1e34df58b64a159221514e9578a3d06c4840f72ea8f0b7b67dc83907716e3063343
6
+ metadata.gz: 64e2cc110909035a29df6e05e46235a0bcc7db8faf4ea8c412a25149da1a03af1684bf887d7e995ea25afafbffacfd3ceedd6de469eb0cb7d77c8dfad3d2b607
7
+ data.tar.gz: 43d451c3267e88b6aa1bda6792310066a4d1666807fdb3bf5741eb138e299db508f23bd211a5c88a5cd7a7f3eb241e543ed6ca95ad2a87f789cc1fdb60f47192
data/README.md CHANGED
@@ -1 +1,65 @@
1
1
  #Demystify
2
+
3
+ Demystify is a gem to help you deal with text, for text analysis or NLP projects.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'demystify'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install demystify
20
+
21
+ ##Usage
22
+
23
+ Make a Text object using your text file.
24
+ ```ruby
25
+ text = Demystify::Text.new('./my_text_file.txt')
26
+ ```
27
+
28
+ Get an array of all characters, words or sentences:
29
+ ```ruby
30
+ text.chars
31
+ text.words
32
+ text.sentences
33
+ ```
34
+
35
+ Count the number of all characters, spaces, new lines, non-whitespace characters,
36
+ punctuation, symbols, letters, non-letters, words and sentences:
37
+ ```ruby
38
+ text.char_count
39
+ text.spaces_count
40
+ text.new_line_count
41
+ text.non_whitespace_char_count
42
+ text.punctuation_count
43
+ text.symbol_count
44
+ text.letter_count
45
+ text.non_letter_count
46
+ text.word_count
47
+ text.sentence_count
48
+ ```
49
+
50
+ Check for the number of occurrences of a particular sequence of characters:
51
+ ```ruby
52
+ text.sequence_count(sequence)
53
+ ```
54
+
55
+ Get the first word or last word of every sentence in an array:
56
+ ```ruby
57
+ text.first_words
58
+ text.last_words
59
+ ```
60
+
61
+ Get a hash of every word in the text of pointing to an array of all of its following or preceding words in the text:
62
+ ```ruby
63
+ text.forwards_probability_hash
64
+ text.backwards_probability_hash
65
+ ```
@@ -1,3 +1,3 @@
1
1
  module Demystify
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
data/lib/demystify.rb CHANGED
@@ -20,15 +20,22 @@ module Demystify
20
20
 
21
21
  class Text
22
22
 
23
- attr_accessor :content, :chars, :words
23
+ attr_accessor :content,
24
+ :chars,
25
+ :words,
26
+ :sentences,
27
+ :forwards_probability_hash,
28
+ :backwards_probability_hash,
29
+ :first_words,
30
+ :last_words
24
31
 
25
32
  def initialize(file)
26
33
  @content = open(file).read
27
34
  @chars = @content.split("")
28
35
  @words = @content.split(/[^[[:word:]]]+/)
29
-
30
- @sentences = make_sentences
31
-
36
+ make_sentences
37
+ make_probability_hashes
38
+ make_first_and_last_words
32
39
  end
33
40
 
34
41
  def char_count
@@ -108,33 +115,41 @@ module Demystify
108
115
  @sentences.length
109
116
  end
110
117
 
111
- def first_words_of_sentences
112
- first_words = []
113
- @sentences.each do |sentence|
114
- first_words << sentence.first
115
- end
116
- first_words
117
- end
118
+ private
118
119
 
119
- def last_words_of_sentences
120
- last_words = []
120
+ def make_first_and_last_words
121
+ @first_words = []
122
+ @last_words = []
121
123
  @sentences.each do |sentence|
122
- last_words << sentence.last
124
+ split_sentence = sentence.split(" ")
125
+ @first_words << split_sentence.first
126
+ @last_words << split_sentence.last
123
127
  end
124
- last_words
125
128
  end
126
129
 
127
- private
128
-
129
130
  def make_sentences
130
131
  sentence_regex = /((?<=[a-z0-9)][.?!])|(?<=[a-z0-9][.?!]"))\s+(?="?[A-Z])/
131
132
  sentences = @content.split(sentence_regex)
132
133
  sentences.select!{|sentence| sentence.length > 1}
133
- sentences.map{|sentence| sentence.chomp}
134
+ @sentences = sentences.map{|sentence| sentence.chomp}
135
+ end
136
+
137
+ def make_probability_hashes
138
+ @forwards_probability_hash = Hash.new { |h, k| h[k] = [] }
139
+ @backwards_probability_hash = Hash.new { |h, k| h[k] = [] }
140
+ @sentences.each do |sentence|
141
+ sentence_array = sentence.split(" ")
142
+ sentence_array.each_with_index do |word, i|
143
+ unless i == sentence_array.length - 1
144
+ @forwards_probability_hash[word] << sentence_array[i+1]
145
+ end
146
+ unless i == 0
147
+ @backwards_probability_hash[word] << sentence_array[i-1]
148
+ end
149
+ end
150
+ end
134
151
  end
135
152
 
136
153
  end
137
154
 
138
155
  end
139
-
140
- something = Demystify::Text.new(File.join( File.dirname(__FILE__), '../sample1.txt'))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: demystify
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - DouglasTGordon