momblish 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/freq.txt ADDED
File without changes
@@ -0,0 +1,30 @@
1
+ require 'json'
2
+
3
+ class Momblish
4
+ class Corpus
5
+ attr_accessor :weighted_bigrams, :occurrences
6
+
7
+ def initialize(weighted_bigrams = {}, occurrences = {})
8
+ @weighted_bigrams = weighted_bigrams
9
+ @occurrences = occurrences
10
+ end
11
+
12
+ def self.load(path)
13
+ data = File.read(path)
14
+ parsed = JSON.parse(data)
15
+ new(parsed['weighted_bigrams'], parsed['occurrences'])
16
+ end
17
+
18
+ def ==(other)
19
+ @weighted_bigrams == other.weighted_bigrams && @occurrences == other.occurrences
20
+ end
21
+
22
+ def save(path)
23
+ saved_corpus = {
24
+ weighted_bigrams: @weighted_bigrams,
25
+ occurrences: @occurrences
26
+ }
27
+ File.write(path, JSON.dump(saved_corpus))
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,65 @@
1
+ require 'json'
2
+ require 'debug'
3
+
4
+ class Momblish
5
+ class CorpusAnalyzer
6
+ PUNCTUATION = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\n".split('')
7
+
8
+ attr_accessor :words, :corpus
9
+
10
+ def initialize(corpus = [])
11
+ @words = corpus.map(&:rstrip)
12
+ @corpus = Corpus.new({}, {})
13
+ init_weighted_bigrams
14
+ init_occurrences
15
+ end
16
+
17
+ def init_weighted_bigrams
18
+ starting_bigrams = Hash.new(0)
19
+
20
+ filtered_words = @words.lazy.select do |word|
21
+ word.length > 2 && (word[0..1].chars & PUNCTUATION).empty?
22
+ end
23
+
24
+ filtered_words.each do |word|
25
+ bigram = word[0..1].upcase
26
+ starting_bigrams[bigram] += 1
27
+ end
28
+
29
+ total = starting_bigrams.values.sum
30
+
31
+ starting_bigrams.each do |bigram, count|
32
+ @corpus.weighted_bigrams[bigram] = count.to_f / total
33
+ end
34
+ end
35
+
36
+ def init_occurrences
37
+ all_trigrams = @words.each.with_object([]) { |word, memo|
38
+ word_chars = word.chomp.upcase.chars
39
+ next if (word_chars & PUNCTUATION).any?
40
+
41
+ memo.concat(word_chars.each_cons(3).to_a)
42
+ }
43
+
44
+ occurrences = Hash.new { |h, k| h[k] = Hash.new(0) }
45
+
46
+ all_trigrams
47
+ .group_by { |trigram| trigram[0..1].join }
48
+ .each_pair do |bigram, trigrams|
49
+ trigrams.each do |trigram|
50
+ last_char = trigram.last
51
+ occurrences[bigram][last_char] += 1
52
+ end
53
+ end
54
+
55
+ @corpus.occurrences = Hash.new { |h, k| h[k] = Hash.new(&h.default_proc) }
56
+
57
+ occurrences.each do |bigram, last_letters|
58
+ total = last_letters.values.sum.to_f
59
+ last_letters.each do |last_letter, count|
60
+ @corpus.occurrences[bigram][last_letter] = count / total
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,3 @@
1
+ class Momblish
2
+ VERSION = "0.1.0"
3
+ end
data/lib/momblish.rb ADDED
@@ -0,0 +1,90 @@
1
+ require "momblish/version"
2
+ require "momblish/corpus_analyzer"
3
+ require "momblish/corpus"
4
+
5
+ class Momblish
6
+ class Error < StandardError; end
7
+
8
+ module WeightedSample
9
+ refine Hash do
10
+ def weighted_sample
11
+ self.max_by { |_, weight| rand ** (1.0 / weight) }&.first
12
+ end
13
+ end
14
+ end
15
+
16
+ using WeightedSample
17
+
18
+ DICT = {
19
+ 'english' => ['/usr/share/dict/words', '/usr/dict/words', '/usr/share/dict/web2'],
20
+ 'simple' => ["#{__dir__}/corpuses/simple.txt"],
21
+ 'names' => ['/usr/share/dict/propernames', '/usr/dict/propernames'],
22
+ 'spanish' => ["#{__dir__}/corpuses/spanish.txt"]
23
+ }
24
+
25
+ class EmptyCorpusError < StandardError
26
+ attr_reader :message
27
+
28
+ def initialize(message)
29
+ @message = message
30
+ end
31
+ end
32
+
33
+ class << self
34
+ def lookup_dict(lang)
35
+ DICT[lang].find { |location| puts location; File.exist?(location) }
36
+ end
37
+
38
+ def method_missing(lang)
39
+ if(DICT.has_key?(lang.to_s))
40
+ dict_file = lookup_dict(lang.to_s)
41
+ corpus = Momblish::CorpusAnalyzer.new(File.readlines(dict_file)).corpus
42
+ new(corpus)
43
+ else
44
+ super
45
+ end
46
+ end
47
+ end
48
+
49
+ attr_accessor :corpus
50
+
51
+ def initialize(corpus = nil)
52
+ @corpus = corpus || Corpus.new({}, {})
53
+
54
+ if @corpus.weighted_bigrams.empty? || @corpus.occurrences.empty?
55
+ raise EmptyCorpusError.new('Your corpus has no words')
56
+ end
57
+ end
58
+
59
+ def word(length = nil)
60
+ length ||= rand(4..12)
61
+
62
+ word = @corpus.weighted_bigrams.keys.sample
63
+
64
+ (length - 2).times do
65
+ last_bigram = word[-2..-1]
66
+
67
+ next_letter = @corpus.occurrences[last_bigram].weighted_sample
68
+
69
+ return word.downcase if next_letter.nil?
70
+
71
+ word += next_letter
72
+ end
73
+
74
+ word.downcase
75
+ end
76
+
77
+ def sentence(count = nil, word_length = nil)
78
+ raise ArgumentError, 'You must provide a block or a count' if count.nil? && !block_given?
79
+
80
+ if block_given?
81
+ if count.nil?
82
+ loop { yield word(word_length) }
83
+ else
84
+ count.times { yield word(word_length) }
85
+ end
86
+ else
87
+ Array.new(count) { word(word_length) }
88
+ end
89
+ end
90
+ end
data/momblish.gemspec ADDED
@@ -0,0 +1,29 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "momblish/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "momblish"
8
+ spec.version = Momblish::VERSION
9
+ spec.authors = ["Stephen Prater"]
10
+ spec.email = ["me@stephenprater.com"]
11
+ spec.license = "MIT"
12
+
13
+ spec.summary = "Generate nonsense words in any language by corpus analysis"
14
+ spec.homepage = "https://github.com/stephenprater/momblish-rb"
15
+
16
+ # Specify which files should be added to the gem when it is released.
17
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
18
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
19
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
20
+ end
21
+ spec.bindir = "exe"
22
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
23
+ spec.require_paths = ["lib"]
24
+
25
+ spec.add_development_dependency "bundler", "~> 2.4"
26
+ spec.add_development_dependency "rake", "~> 13.1"
27
+ spec.add_development_dependency "minitest", "~> 5.20"
28
+ spec.add_development_dependency "debug", "~> 1.9"
29
+ end
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: momblish
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Stephen Prater
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2023-12-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.4'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.4'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '13.1'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '13.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '5.20'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '5.20'
55
+ - !ruby/object:Gem::Dependency
56
+ name: debug
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.9'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.9'
69
+ description:
70
+ email:
71
+ - me@stephenprater.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - ".travis.yml"
78
+ - Gemfile
79
+ - Gemfile.lock
80
+ - README.md
81
+ - Rakefile
82
+ - bin/console
83
+ - bin/setup
84
+ - lib/corpuses/simple.txt
85
+ - lib/corpuses/spanish.txt
86
+ - lib/freq.txt
87
+ - lib/momblish.rb
88
+ - lib/momblish/corpus.rb
89
+ - lib/momblish/corpus_analyzer.rb
90
+ - lib/momblish/version.rb
91
+ - momblish.gemspec
92
+ homepage: https://github.com/stephenprater/momblish-rb
93
+ licenses:
94
+ - MIT
95
+ metadata: {}
96
+ post_install_message:
97
+ rdoc_options: []
98
+ require_paths:
99
+ - lib
100
+ required_ruby_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ requirements: []
111
+ rubygems_version: 3.4.10
112
+ signing_key:
113
+ specification_version: 4
114
+ summary: Generate nonsense words in any language by corpus analysis
115
+ test_files: []