momblish 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/freq.txt ADDED
File without changes
@@ -0,0 +1,30 @@
1
+ require 'json'
2
+
3
+ class Momblish
4
+ class Corpus
5
+ attr_accessor :weighted_bigrams, :occurrences
6
+
7
+ def initialize(weighted_bigrams = {}, occurrences = {})
8
+ @weighted_bigrams = weighted_bigrams
9
+ @occurrences = occurrences
10
+ end
11
+
12
+ def self.load(path)
13
+ data = File.read(path)
14
+ parsed = JSON.parse(data)
15
+ new(parsed['weighted_bigrams'], parsed['occurrences'])
16
+ end
17
+
18
+ def ==(other)
19
+ @weighted_bigrams == other.weighted_bigrams && @occurrences == other.occurrences
20
+ end
21
+
22
+ def save(path)
23
+ saved_corpus = {
24
+ weighted_bigrams: @weighted_bigrams,
25
+ occurrences: @occurrences
26
+ }
27
+ File.write(path, JSON.dump(saved_corpus))
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,65 @@
1
+ require 'json'
2
+ require 'debug'
3
+
4
+ class Momblish
5
+ class CorpusAnalyzer
6
+ PUNCTUATION = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\n".split('')
7
+
8
+ attr_accessor :words, :corpus
9
+
10
+ def initialize(corpus = [])
11
+ @words = corpus.map(&:rstrip)
12
+ @corpus = Corpus.new({}, {})
13
+ init_weighted_bigrams
14
+ init_occurrences
15
+ end
16
+
17
+ def init_weighted_bigrams
18
+ starting_bigrams = Hash.new(0)
19
+
20
+ filtered_words = @words.lazy.select do |word|
21
+ word.length > 2 && (word[0..1].chars & PUNCTUATION).empty?
22
+ end
23
+
24
+ filtered_words.each do |word|
25
+ bigram = word[0..1].upcase
26
+ starting_bigrams[bigram] += 1
27
+ end
28
+
29
+ total = starting_bigrams.values.sum
30
+
31
+ starting_bigrams.each do |bigram, count|
32
+ @corpus.weighted_bigrams[bigram] = count.to_f / total
33
+ end
34
+ end
35
+
36
+ def init_occurrences
37
+ all_trigrams = @words.each.with_object([]) { |word, memo|
38
+ word_chars = word.chomp.upcase.chars
39
+ next if (word_chars & PUNCTUATION).any?
40
+
41
+ memo.concat(word_chars.each_cons(3).to_a)
42
+ }
43
+
44
+ occurrences = Hash.new { |h, k| h[k] = Hash.new(0) }
45
+
46
+ all_trigrams
47
+ .group_by { |trigram| trigram[0..1].join }
48
+ .each_pair do |bigram, trigrams|
49
+ trigrams.each do |trigram|
50
+ last_char = trigram.last
51
+ occurrences[bigram][last_char] += 1
52
+ end
53
+ end
54
+
55
+ @corpus.occurrences = Hash.new { |h, k| h[k] = Hash.new(&h.default_proc) }
56
+
57
+ occurrences.each do |bigram, last_letters|
58
+ total = last_letters.values.sum.to_f
59
+ last_letters.each do |last_letter, count|
60
+ @corpus.occurrences[bigram][last_letter] = count / total
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,3 @@
1
+ class Momblish
2
+ VERSION = "0.1.0"
3
+ end
data/lib/momblish.rb ADDED
@@ -0,0 +1,90 @@
1
+ require "momblish/version"
2
+ require "momblish/corpus_analyzer"
3
+ require "momblish/corpus"
4
+
5
+ class Momblish
6
+ class Error < StandardError; end
7
+
8
+ module WeightedSample
9
+ refine Hash do
10
+ def weighted_sample
11
+ self.max_by { |_, weight| rand ** (1.0 / weight) }&.first
12
+ end
13
+ end
14
+ end
15
+
16
+ using WeightedSample
17
+
18
+ DICT = {
19
+ 'english' => ['/usr/share/dict/words', '/usr/dict/words', '/usr/share/dict/web2'],
20
+ 'simple' => ["#{__dir__}/corpuses/simple.txt"],
21
+ 'names' => ['/usr/share/dict/propernames', '/usr/dict/propernames'],
22
+ 'spanish' => ["#{__dir__}/corpuses/spanish.txt"]
23
+ }
24
+
25
+ class EmptyCorpusError < StandardError
26
+ attr_reader :message
27
+
28
+ def initialize(message)
29
+ @message = message
30
+ end
31
+ end
32
+
33
+ class << self
34
+ def lookup_dict(lang)
35
+ DICT[lang].find { |location| puts location; File.exist?(location) }
36
+ end
37
+
38
+ def method_missing(lang)
39
+ if(DICT.has_key?(lang.to_s))
40
+ dict_file = lookup_dict(lang.to_s)
41
+ corpus = Momblish::CorpusAnalyzer.new(File.readlines(dict_file)).corpus
42
+ new(corpus)
43
+ else
44
+ super
45
+ end
46
+ end
47
+ end
48
+
49
+ attr_accessor :corpus
50
+
51
+ def initialize(corpus = nil)
52
+ @corpus = corpus || Corpus.new({}, {})
53
+
54
+ if @corpus.weighted_bigrams.empty? || @corpus.occurrences.empty?
55
+ raise EmptyCorpusError.new('Your corpus has no words')
56
+ end
57
+ end
58
+
59
+ def word(length = nil)
60
+ length ||= rand(4..12)
61
+
62
+ word = @corpus.weighted_bigrams.keys.sample
63
+
64
+ (length - 2).times do
65
+ last_bigram = word[-2..-1]
66
+
67
+ next_letter = @corpus.occurrences[last_bigram].weighted_sample
68
+
69
+ return word.downcase if next_letter.nil?
70
+
71
+ word += next_letter
72
+ end
73
+
74
+ word.downcase
75
+ end
76
+
77
+ def sentence(count = nil, word_length = nil)
78
+ raise ArgumentError, 'You must provide a block or a count' if count.nil? && !block_given?
79
+
80
+ if block_given?
81
+ if count.nil?
82
+ loop { yield word(word_length) }
83
+ else
84
+ count.times { yield word(word_length) }
85
+ end
86
+ else
87
+ Array.new(count) { word(word_length) }
88
+ end
89
+ end
90
+ end
data/momblish.gemspec ADDED
@@ -0,0 +1,29 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "momblish/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "momblish"
8
+ spec.version = Momblish::VERSION
9
+ spec.authors = ["Stephen Prater"]
10
+ spec.email = ["me@stephenprater.com"]
11
+ spec.license = "MIT"
12
+
13
+ spec.summary = "Generate nonsense words in any language by corpus analysis"
14
+ spec.homepage = "https://github.com/stephenprater/momblish-rb"
15
+
16
+ # Specify which files should be added to the gem when it is released.
17
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
18
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
19
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
20
+ end
21
+ spec.bindir = "exe"
22
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
23
+ spec.require_paths = ["lib"]
24
+
25
+ spec.add_development_dependency "bundler", "~> 2.4"
26
+ spec.add_development_dependency "rake", "~> 13.1"
27
+ spec.add_development_dependency "minitest", "~> 5.20"
28
+ spec.add_development_dependency "debug", "~> 1.9"
29
+ end
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: momblish
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Stephen Prater
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2023-12-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.4'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.4'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '13.1'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '13.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '5.20'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '5.20'
55
+ - !ruby/object:Gem::Dependency
56
+ name: debug
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.9'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.9'
69
+ description:
70
+ email:
71
+ - me@stephenprater.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - ".travis.yml"
78
+ - Gemfile
79
+ - Gemfile.lock
80
+ - README.md
81
+ - Rakefile
82
+ - bin/console
83
+ - bin/setup
84
+ - lib/corpuses/simple.txt
85
+ - lib/corpuses/spanish.txt
86
+ - lib/freq.txt
87
+ - lib/momblish.rb
88
+ - lib/momblish/corpus.rb
89
+ - lib/momblish/corpus_analyzer.rb
90
+ - lib/momblish/version.rb
91
+ - momblish.gemspec
92
+ homepage: https://github.com/stephenprater/momblish-rb
93
+ licenses:
94
+ - MIT
95
+ metadata: {}
96
+ post_install_message:
97
+ rdoc_options: []
98
+ require_paths:
99
+ - lib
100
+ required_ruby_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ requirements: []
111
+ rubygems_version: 3.4.10
112
+ signing_key:
113
+ specification_version: 4
114
+ summary: Generate nonsense words in any language by corpus analysis
115
+ test_files: []