ruby_tokenizer 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9159d68232eb0ceada325a52566e93ecfb4fc3ff
4
- data.tar.gz: 3367ad1e921f3fdf894a0acfec6f7d8b995aac7c
3
+ metadata.gz: 536056f6442aa02f631f5636c83136a749a6780c
4
+ data.tar.gz: d1af87a98c8cf3d3792f08fe79386b22abd2ef77
5
5
  SHA512:
6
- metadata.gz: 171dfde354ee7916ad10ca8161a1169d4fb1e95eea854e44ad80c46660dc246c78f382bbb018250209dbe674ad66dafffba8821de6a8b7647e4a23be40a6821c
7
- data.tar.gz: c5bf1cdb2c95e6400443a6bd2d4af5b20787d23650f643ee2ff80968ed137c740eeb36db08ab5f02de9f1343432c2b9f7bf83929071d140c588a694ba8146af2
6
+ metadata.gz: d49e6a5b9d7cc069e144242389c0523cfe843a06a3873d04fb6e420107e9a528f170478305c4ab79c6fd7626bc821d54c927a6a496e82ceb1a674bd98aac6ed8
7
+ data.tar.gz: a4559c6d39ecf48e43b08af90d1d4f746ab1b88b15586e9ebb9dc49be5dc5cdf32732d61ee8ec695212a34df05b73b34d8c00f006ce0cef5db3f83bc878a683e
@@ -1,19 +1,11 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'ruby_tokenizer'
4
+ require 'input_stream_module/input_stream.rb'
4
5
  require 'pry'
5
6
 
6
- if ARGV.empty?
7
- puts "--- Please input your text below ----"
8
- text = STDIN.gets
9
- elsif ARGV.detect { |input| File.file?(input) == false }
10
- puts "File not found: Please try again."
11
- exit 2
12
- else
13
- text = ARGF.read
14
- end
15
-
16
- input = RubyTokenizer::Tokenizer.new(text)
17
- top_ten = input.rank
7
+ input = InputStream.analyze(ARGV)
8
+ output = RubyTokenizer::Tokenizer.new(input)
9
+ top_ten = output.rank
18
10
 
19
11
  Pry::ColorPrinter.pp(top_ten)
@@ -0,0 +1,28 @@
1
+ module InputStream
2
+
3
+ def self.analyze(input)
4
+ if input.empty?
5
+ self.prompt("--- Please input your text below ----")
6
+ self.get_input
7
+ else
8
+ self.read_file(input)
9
+ end
10
+ end
11
+
12
+ def self.prompt(string)
13
+ puts string
14
+ end
15
+
16
+ def self.get_input
17
+ STDIN.gets
18
+ end
19
+
20
+ def self.read_file(input)
21
+ if input.detect { |path| File.file?(path) == false }
22
+ raise LoadError, "File not found: Please try again."
23
+ exit 2
24
+ else
25
+ ARGF.read
26
+ end
27
+ end
28
+ end
@@ -1,11 +1,11 @@
1
1
  module Patterns
2
- PUNCTUATION = [':', ',', '—', '!', '?', ';', '.', '"']
2
+ PUNCTUATION = [':', ',', '—', '!', '?', ';', '"']
3
3
  PARENS = ['{', '[', '}', ']', '(', ')']
4
4
  MISC = ['<', '«', '„', '>', '»', '“', '*']
5
5
  # CONJUNCTIONS = ['for ', 'and ', 'nor ', 'but ', 'or ', 'yet ', 'so ', 'as ']
6
6
  # DETERMINERS = ['the ', 'this ', 'that ']
7
7
 
8
- BASIC = PUNCTUATION + MISC
8
+ BASIC = PUNCTUATION + PARENS + MISC
9
9
 
10
10
  def self.basic
11
11
  Regexp.union(BASIC)
@@ -1,6 +1,5 @@
1
1
  require "ruby_tokenizer/version"
2
2
  require "patterns_module/patterns"
3
- require 'pry'
4
3
 
5
4
  module RubyTokenizer
6
5
 
@@ -20,7 +19,11 @@ module RubyTokenizer
20
19
  protected
21
20
 
22
21
  def filter
23
- text.downcase.gsub(Patterns.basic, '')
22
+ text.downcase.gsub(Patterns.basic, ' ')
23
+ end
24
+
25
+ def tokenize
26
+ self.filter.scan(/[-\w'’.@]+/).map { |token| token.gsub(/[._-]$/, '') }
24
27
  end
25
28
 
26
29
  def frequency
@@ -30,9 +33,5 @@ module RubyTokenizer
30
33
 
31
34
  return count
32
35
  end
33
-
34
- def tokenize
35
- self.filter.scan(/[-\w'’]+/)
36
- end
37
36
  end
38
37
  end
@@ -1,3 +1,3 @@
1
1
  module RubyTokenizer
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby_tokenizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - irinarenteria
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-26 00:00:00.000000000 Z
11
+ date: 2016-03-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pry
@@ -92,6 +92,7 @@ extensions: []
92
92
  extra_rdoc_files: []
93
93
  files:
94
94
  - bin/tokenizer
95
+ - lib/input_stream_module/input_stream.rb
95
96
  - lib/patterns_module/patterns.rb
96
97
  - lib/ruby_tokenizer.rb
97
98
  - lib/ruby_tokenizer/version.rb