textation 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: a0fe6300ad5c82008bcfecfc328687b9033f8d2782f6065fe39a1c57d57b25ab
4
+ data.tar.gz: 87ea79f2420f77ecab5139ac777cc90a6052eb3c1d526c9067559f9bc55cb11d
5
+ SHA512:
6
+ metadata.gz: c4e62a7aa9b8268c38201d25119cb4d70aa594af8bde739d6ab0e87b5bde6d37bd163a6a4813e2b0711b3ce1de677b1c5604816a00bbd0224e89079ac96ba4c8
7
+ data.tar.gz: e60d6cfccd32d7e91a679acc6c66728b53f46a7bf52174007e115f75c7c36deb28a30927b9a7cce0d45d2eef7b106fcd96e3854d90bc1d1e4bcf9b32c0970e15
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+ Gemfile.lock
10
+ pkg/*
@@ -0,0 +1,6 @@
1
+ ---
2
+ language: ruby
3
+ cache: bundler
4
+ rvm:
5
+ - 2.7.1
6
+ before_install: gem install bundler -v 2.1.4
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in textation.gemspec
4
+ gemspec
5
+
6
+ gem "rake", "~> 12.0"
7
+ gem "rspec", "~> 3.2"
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2020 iva mudronja
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,91 @@
1
+ # Textation
2
+
3
+ Simple text analysis with Ruby
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'textation'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle install
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install textation
20
+
21
+ ## Usage
22
+
23
+ ### Create an instance for usage:
24
+
25
+ analyzer = Text.new
26
+
27
+ All methods work for .txt file and for strings
28
+
29
+ Analyze .txt file || string:
30
+
31
+ analyzer.analyze('some_text.txt')
32
+
33
+ will work same as for string👇
34
+
35
+ analyzer.analyze('I love Ruby! And Ruby loves me!')
36
+
37
+ { character_count: 31,
38
+ character_count_excluding_spaces: 25,
39
+ letter_count: 23,
40
+ line_count: 1,
41
+ word_count: 7,
42
+ sentence_count: 2,
43
+ paragraph_count: 1,
44
+ lines_per_paragraph: "1",
45
+ syllables_per_line: "9",
46
+ average_words_per_sentence: 3.5,
47
+ average_sentences_per_paragraph: 2.0,
48
+ useful_words: "love, ruby, loves",
49
+ percentage_of_useful_words: 42.86,
50
+ unique_words: "i, love, ruby, and, loves, me",
51
+ percentage_of_unique_words: 85.71,
52
+ occurences_of_words: "ruby: 2, i: 1, love: 1, and: 1, loves: 1, me: 1",
53
+ percentage_of_words: "ruby: 28.57, i: 14.29, love: 14.29, and: 14.29, loves: 14.29, me: 14.29"
54
+ }
55
+
56
+
57
+ Get first 3 top words including stop words (pass .txt file || string):
58
+
59
+ analyzer.top_words_all('data/test_source_text.txt')
60
+ #=> 'someone, must, have'
61
+
62
+ You can pass number:
63
+
64
+ analyzer.top_words_all('data/test_source_text.txt', 1)
65
+ #=> 'someone'
66
+
67
+
68
+ Get first 3 top words excluding stop words (pass .txt file || string):
69
+
70
+ analyzer.top_words_no_stop_words('data/test_source_text.txt')
71
+ #=> 'slandered, josef, morning'
72
+
73
+ You can pass number:
74
+
75
+ analyzer.top_words_no_stop_words('data/test_source_text.txt', 4)
76
+ #=> 'slandered, josef, morning, wrong'
77
+
78
+
79
+ Get occurence of word (pass .txt file || string):
80
+
81
+ analyzer.occurence_of_word('data/test_source_text.txt', 'morning')
82
+ #=> 5
83
+
84
+ Get percentage of word (pass .txt file || string):
85
+
86
+ analyzer.percentage_of_word('data/test_source_text.txt', 'wrong')
87
+ #=> 5.56
88
+
89
+ ## License
90
+
91
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "textation"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,6 @@
1
+ Someone must have slandered Josef K., for one morning, without having done anything truly wrong, he was arrested.
2
+ Someone must have slandered Josef K., for one morning, without having done anything truly wrong, he was arrested!
3
+ Someone must have slandered Josef K., for one morning, without having done anything truly wrong, he was arrested?
4
+
5
+ Someone must have slandered Josef K., for one morning, without having done anything truly wrong, he was arrested?!
6
+ Someone must have slandered Josef K., for one morning, without having done anything truly wrong, he was arrested...
@@ -0,0 +1,8 @@
1
+ require "textation/version"
2
+ require "textation/text"
3
+
4
+ module Textation
5
+ class Error < StandardError; end
6
+ # Your code goes here...
7
+ end
8
+
@@ -0,0 +1,97 @@
1
+ require "textation/version"
2
+
3
+ STOP_WORDS = ["a", "a's", "able", "about", "above", "according", "accordingly", "across", "actually", "after", "afterwards", "again", "against", "ain't", "all", "allow", "allows", "almost", "alone", "along", "already", "also", "although", "always", "am", "among", "amongst", "an", "and", "another", "any", "anybody", "anyhow", "anyone", "anything", "anyway", "anyways", "anywhere", "apart", "appear", "appreciate", "appropriate", "are", "aren't", "around", "as", "aside", "ask", "asking", "associated", "at", "available", "away", "awfully", "b", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "believe", "below", "beside", "besides", "best", "better", "between", "beyond", "both", "brief", "but", "by", "c", "c'mon", "c's", "came", "can", "can't", "cannot", "cant", "cause", "causes", "certain", "certainly", "changes", "clearly", "co", "com", "come", "comes", "concerning", "consequently", "consider", "considering", "contain", "containing", "contains", "corresponding", "could", "couldn't", "course", "currently", "d", "definitely", "described", "despite", "did", "didn't", "different", "do", "does", "doesn't", "doing", "don't", "done", "down", "downwards", "during", "e", "each", "edu", "eg", "eight", "either", "else", "elsewhere", "enough", "entirely", "especially", "et", "etc", "even", "ever", "every", "everybody", "everyone", "everything", "everywhere", "ex", "exactly", "example", "except", "f", "far", "few", "fifth", "first", "five", "followed", "following", "follows", "for", "former", "formerly", "forth", "four", "from", "further", "furthermore", "g", "get", "gets", "getting", "given", "gives", "go", "goes", "going", "gone", "got", "gotten", "greetings", "h", "had", "hadn't", "happens", "hardly", "has", "hasn't", "have", "haven't", "having", "he", "he's", "hello", "help", "hence", "her", "here", "here's", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "hi", "him", "himself", "his", "hither", "hopefully", "how", "howbeit", "however", "i", "i'd", "i'll", "i'm", "i've", "ie", "if", "ignored", "immediate", "in", "inasmuch", "inc", "indeed", "indicate", "indicated", "indicates", "inner", "insofar", "instead", "into", "inward", "is", "isn't", "it", "it'd", "it'll", "it's", "its", "itself", "j", "just", "k", "keep", "keeps", "kept", "know", "knows", "known", "l", "last", "lately", "later", "latter", "latterly", "least", "less", "lest", "let", "let's", "like", "liked", "likely", "little", "look", "looking", "looks", "ltd", "m", "mainly", "many", "may", "maybe", "me", "mean", "meanwhile", "merely", "might", "more", "moreover", "most", "mostly", "much", "must", "my", "myself", "n", "name", "namely", "nd", "near", "nearly", "necessary", "need", "needs", "neither", "never", "nevertheless", "new", "next", "nine", "no", "nobody", "non", "none", "noone", "nor", "normally", "not", "nothing", "novel", "now", "nowhere", "o", "obviously", "of", "off", "often", "oh", "ok", "okay", "old", "on", "once", "one", "ones", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", "ourselves", "out", "outside", "over", "overall", "own", "p", "particular", "particularly", "per", "perhaps", "placed", "please", "plus", "possible", "presumably", "probably", "provides", "q", "que", "quite", "qv", "r", "rather", "rd", "re", "really", "reasonably", "regarding", "regardless", "regards", "relatively", "respectively", "right", "s", "said", "same", "saw", "say", "saying", "says", "second", "secondly", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen", "self", "selves", "sensible", "sent", "serious", "seriously", "seven", "several", "shall", "she", "should", "shouldn't", "since", "six", "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry", "specified", "specify", "specifying", "still", "sub", "such", "sup", "sure", "t", "t's", "take", "taken", "tell", "tends", "th", "than", "thank", "thanks", "thanx", "that", "that's", "thats", "the", "their", "theirs", "them", "themselves", "then", "thence", "there", "there's", "thereafter", "thereby", "therefore", "therein", "theres", "thereupon", "these", "they", "they'd", "they'll", "they're", "they've", "think", "third", "this", "thorough", "thoroughly", "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "took", "toward", "towards", "tried", "tries", "truly", "try", "trying", "twice", "two", "u", "un", "under", "unfortunately", "unless", "unlikely", "until", "unto", "up", "upon", "us", "use", "used", "useful", "uses", "using", "usually", "uucp", "v", "value", "various", "very", "via", "viz", "vs", "w", "want", "wants", "was", "wasn't", "way", "we", "we'd", "we'll", "we're", "we've", "welcome", "well", "went", "were", "weren't", "what", "what's", "whatever", "when", "whence", "whenever", "where", "where's", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "who's", "whoever", "whole", "whom", "whose", "why", "will", "willing", "wish", "with", "within", "without", "won't", "wonder", "would", "wouldn't", "x", "y", "yes", "yet", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves", "z", "zero"]
4
+
5
+
6
+ class Text
7
+ def analyze(text)
8
+ text = check_input(text)
9
+ result = {}
10
+ result[:character_count] = text.length
11
+ result[:character_count_excluding_spaces] = text.gsub(/\s/, '').length
12
+ result[:letter_count] = text.gsub(/[^[:alpha:]]/, '').length
13
+ result[:line_count] = text.split(/\n/).length
14
+ result[:word_count] = text.split(/\W+/).delete_if(&:empty?).length
15
+ result[:sentence_count] = text.split(/[^[:alpha:]{1}\.]\.{1,3}\s?\r?\n?|\?+|!+|\?!+|!\?+/).length
16
+ result[:paragraph_count] = text.split(/\n\n/).length
17
+ result[:lines_per_paragraph] = text.split(/\n\n/).map { |p| p.split(/\n/).length }.join(', ')
18
+ result[:syllables_per_line] = syllables_per_line(text).join(', ')
19
+ result[:average_words_per_sentence] = (result[:word_count].to_f / result[:sentence_count]).round(2)
20
+ result[:average_sentences_per_paragraph] = (result[:sentence_count].to_f / result[:paragraph_count]).round(2)
21
+ result[:useful_words] = useful_words(text).join(', ')
22
+ result[:percentage_of_useful_words] = ((result[:useful_words].split(/\W+/).length.to_f / result[:word_count]) * 100).round(2)
23
+ result[:occurences_of_words] = occurences_of_words(text).to_a.map {|el| "#{el[0]}: #{el[1]}" }.join(', ')
24
+ result[:percentage_of_words] = percentage_of_words(text).to_a.map {|el| "#{el[0]}: #{el[1]}" }.join(', ')
25
+ result[:unique_words] = text.downcase.split(/\W+/).select { |w| w.length >= 1 }.uniq.join(', ') # result[:occurences_of_words].select { |_k, v| v == 1 }.keys
26
+ result[:percentage_of_unique_words] = ((result[:unique_words].split(/\W+/).length.to_f / result[:word_count]) * 100).round(2)
27
+ result
28
+ end
29
+
30
+ def useful_words(text)
31
+ text = check_input(text)
32
+ text.downcase
33
+ .split(/\W+/)
34
+ .delete_if { |w| STOP_WORDS.include?(w) }
35
+ .select { |w| w.length >= 1 }
36
+ .uniq
37
+ end
38
+
39
+ def top_words_no_stop_words(text, num = 3)
40
+ text = useful_words(text)
41
+ top_words(text, num)
42
+ end
43
+
44
+ def top_words_all(text, num = 3)
45
+ text = (check_input(text)).downcase.split(/\W+/)
46
+ top_words(text, num)
47
+ end
48
+
49
+ def top_words(text, num)
50
+ text.group_by(&:itself)
51
+ .transform_values(&:count)
52
+ .sort_by { |_k, v| -v }
53
+ .first(num)
54
+ .map(&:first).join(', ')
55
+ end
56
+
57
+ def occurences_of_words(text)
58
+ text = check_input(text)
59
+ text.downcase
60
+ .split(/\W+/)
61
+ .delete_if(&:empty?)
62
+ .group_by(&:itself)
63
+ .transform_values(&:count)
64
+ .sort_by { |_k, v| -v }
65
+ .to_h
66
+ end
67
+
68
+ def occurence_of_word(text, word)
69
+ occurences_of_words(text)[word.downcase]
70
+ end
71
+
72
+ def percentage_of_words(text)
73
+ occurences = occurences_of_words(text)
74
+ len = check_input(text).split(/\W+/).length
75
+ occurences.transform_values { |v| ((v.to_f / len) * 100).round(2) }
76
+ end
77
+
78
+ def percentage_of_word(text, word)
79
+ percentage_of_words(text)[word.downcase]
80
+ end
81
+
82
+ def syllables_per_line(text)
83
+ text.downcase.split(/\n/).map do |line|
84
+ line.split(/\W+/).map do |word|
85
+ if word.split(/[^aeiouy]+/).delete_if(&:empty?).length > 1
86
+ word.gsub(/e$|es$|ed$/, "").split(/[^aeiouy]+/).delete_if(&:empty?).length
87
+ else
88
+ word.split(/[^aeiouy]+/).delete_if(&:empty?).length
89
+ end
90
+ end
91
+ end.map(&:sum)
92
+ end
93
+
94
+ def check_input(text)
95
+ text.match?(/.txt$/) ? File.open(text, 'r').read : text
96
+ end
97
+ end
@@ -0,0 +1,3 @@
1
+ module Textation
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,30 @@
1
+ require_relative 'lib/textation/version'
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "textation"
5
+ spec.version = Textation::VERSION
6
+ spec.authors = ["iva"]
7
+ spec.email = ["iva.pilizota@gmail.com"]
8
+
9
+ spec.summary = "Simple text analysis with Ruby" #%q{TODO: Write a short summary, because RubyGems requires one.}
10
+ # spec.description = %q{TODO: Write a longer description or delete this line.}
11
+ spec.homepage = "https://github.com/ivaMm/textation" #{}"TODO: Put your gem's website or public repo URL here."
12
+ spec.license = "MIT"
13
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
14
+
15
+ #spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
16
+
17
+ spec.metadata["homepage_uri"] = spec.homepage
18
+ spec.metadata["source_code_uri"] = "https://github.com/ivaMm/textation" # "TODO: Put your gem's public repo URL here."
19
+ # spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
20
+
21
+ # Specify which files should be added to the gem when it is released.
22
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
23
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
24
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
25
+ end
26
+ spec.bindir = "exe"
27
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
+ spec.require_paths = ["lib"]
29
+ spec.add_development_dependency "rspec", "~> 3.2"
30
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: textation
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - iva
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-12-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rspec
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3.2'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.2'
27
+ description:
28
+ email:
29
+ - iva.pilizota@gmail.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - ".gitignore"
35
+ - ".travis.yml"
36
+ - Gemfile
37
+ - Gemfile.lock
38
+ - LICENSE.txt
39
+ - README.md
40
+ - Rakefile
41
+ - bin/console
42
+ - bin/setup
43
+ - data/test_source_text.txt
44
+ - lib/textation.rb
45
+ - lib/textation/text.rb
46
+ - lib/textation/version.rb
47
+ - textation.gemspec
48
+ homepage: https://github.com/ivaMm/textation
49
+ licenses:
50
+ - MIT
51
+ metadata:
52
+ homepage_uri: https://github.com/ivaMm/textation
53
+ source_code_uri: https://github.com/ivaMm/textation
54
+ post_install_message:
55
+ rdoc_options: []
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: 2.3.0
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ requirements: []
69
+ rubygems_version: 3.1.2
70
+ signing_key:
71
+ specification_version: 4
72
+ summary: Simple text analysis with Ruby
73
+ test_files: []