demystify 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/lib/demystify.rb +106 -0
- data/sample1.txt +4 -0
- metadata +76 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6401829101cacafbc3bbd667bbc6f7003fd9e1df
|
4
|
+
data.tar.gz: 8558b8f978dcc94dd685acf04e5b62ccbfd90ebe
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: aa98c469ade8a8ebaa75b6cbe74a6c4fbb822f1822ba8be580eff1a90f58ca20039c6cff0076a306a32eca9858a9028f22851dc94a128c27a668a0176c6d2009
|
7
|
+
data.tar.gz: 0b1a2840b3e6564d777890959f016ab4cee51fdaff444e91e1ef962f296b4c1e475322c889ba6636c6e6ceba2d3802abf4238a8cc146108f0439a6f4f5e1a342
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/lib/demystify.rb
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
require 'byebug'
|
2
|
+
module Demystify
|
3
|
+
|
4
|
+
#From wikipedia
|
5
|
+
Punctuation = ['[', ']', '(', ')', '{', '}', '⟨', '⟩', ':', ',', '،',
|
6
|
+
'、', '‒', '–', '—', '―', '…', '!', '.', '‹', '›', '«',
|
7
|
+
'»', '‐', '-', '?', "‘", "’", "“", "”", "'", "\""]
|
8
|
+
|
9
|
+
Symbols = ['&', '*', '@', '\\', '/', '•', '^', '†', '‡', '°', '″', '¡', '¿',
|
10
|
+
'※', '#', '№', '÷', '×', 'º', 'ª', '%', '‰', '+', '−', '=',
|
11
|
+
'‱', '¶', '′', '″', '‴', '§', '~', '_', '|', '‖', '¦', '⁂',
|
12
|
+
'❧', '☞', '‽', '⸮', '◊', '⁀']
|
13
|
+
|
14
|
+
Intellectual_property = ['©', '℗', '®', '℠', '™']
|
15
|
+
|
16
|
+
# Currency = ["¤", "₳", "฿", "₵", "¢", "₡", "₢", "$", "₫", "₯", "₠", "€",
|
17
|
+
# "ƒ", "₣", "₲", "₴", "₭", "₺", "ℳ", "₥", "₦", "₧", "₱", "₰",
|
18
|
+
# "£", "៛", "₨", "₪", "৳", "₮", "₩", "¥"]
|
19
|
+
|
20
|
+
class Text
|
21
|
+
|
22
|
+
attr_accessor :content, :chars
|
23
|
+
|
24
|
+
def initialize(file)
|
25
|
+
@content = open(file).read
|
26
|
+
@chars = @content.split("")
|
27
|
+
end
|
28
|
+
|
29
|
+
def char_count
|
30
|
+
@chars.length
|
31
|
+
end
|
32
|
+
|
33
|
+
def spaces_count
|
34
|
+
count = 0
|
35
|
+
@chars.each do |char|
|
36
|
+
count += 1 if char == " "
|
37
|
+
end
|
38
|
+
count
|
39
|
+
end
|
40
|
+
|
41
|
+
def new_line_count
|
42
|
+
count = 0
|
43
|
+
@chars.each do |char|
|
44
|
+
count += 1 if char == "\n"
|
45
|
+
end
|
46
|
+
count
|
47
|
+
end
|
48
|
+
|
49
|
+
def non_whitespace_char_count
|
50
|
+
char_count - (spaces_count + new_line_count)
|
51
|
+
end
|
52
|
+
|
53
|
+
def punctuation_count
|
54
|
+
count = 0
|
55
|
+
@chars.each do |char|
|
56
|
+
count += 1 if Punctuation.include?(char)
|
57
|
+
end
|
58
|
+
count
|
59
|
+
end
|
60
|
+
|
61
|
+
def symbol_count
|
62
|
+
count = 0
|
63
|
+
@chars.each do |char|
|
64
|
+
count += 1 if Symbols.include?(char)
|
65
|
+
end
|
66
|
+
count
|
67
|
+
end
|
68
|
+
|
69
|
+
def intellectual_property_count
|
70
|
+
count = 0
|
71
|
+
@chars.each do |char|
|
72
|
+
count += 1 if Intellectual_property.include?(char)
|
73
|
+
end
|
74
|
+
count
|
75
|
+
end
|
76
|
+
|
77
|
+
def non_letter_count
|
78
|
+
punctuation_count + symbol_count + intellectual_property_count
|
79
|
+
end
|
80
|
+
|
81
|
+
def letter_count
|
82
|
+
char_count - (spaces_count + new_line_count + symbol_count + intellectual_property_count)
|
83
|
+
end
|
84
|
+
|
85
|
+
def sequence_count(sequence)
|
86
|
+
count = 0
|
87
|
+
i = 0
|
88
|
+
while i < (@chars.length - sequence.length)
|
89
|
+
# puts @chars[i...sequence.length].join("")
|
90
|
+
if @chars[i...(i+sequence.length)].join("") == sequence
|
91
|
+
count += 1
|
92
|
+
end
|
93
|
+
i += 1
|
94
|
+
end
|
95
|
+
count
|
96
|
+
end
|
97
|
+
|
98
|
+
def word_count
|
99
|
+
@content.split(/[^[[:word:]]]+/)
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
something = Demystify::Text.new(File.join( File.dirname(__FILE__), '../sample1.txt'))
|
data/sample1.txt
ADDED
metadata
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: demystify
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- DouglasTGordon
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-12-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.13'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.13'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
description: Demystify allows you to extract relevant information from text for easier
|
42
|
+
analysis.
|
43
|
+
email:
|
44
|
+
- douglastgordon@gmail.com
|
45
|
+
executables: []
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- ".gitignore"
|
50
|
+
- Gemfile
|
51
|
+
- lib/demystify.rb
|
52
|
+
- sample1.txt
|
53
|
+
homepage: https://github.com/douglastgordon/Demystify
|
54
|
+
licenses: []
|
55
|
+
metadata: {}
|
56
|
+
post_install_message:
|
57
|
+
rdoc_options: []
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
requirements: []
|
71
|
+
rubyforge_project:
|
72
|
+
rubygems_version: 2.5.1
|
73
|
+
signing_key:
|
74
|
+
specification_version: 4
|
75
|
+
summary: Tools for text analysis and NLP.
|
76
|
+
test_files: []
|