momblish 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.travis.yml +7 -0
- data/Gemfile +10 -0
- data/Gemfile.lock +40 -0
- data/README.md +64 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/corpuses/simple.txt +1001 -0
- data/lib/corpuses/spanish.txt +86059 -0
- data/lib/freq.txt +0 -0
- data/lib/momblish/corpus.rb +30 -0
- data/lib/momblish/corpus_analyzer.rb +65 -0
- data/lib/momblish/version.rb +3 -0
- data/lib/momblish.rb +90 -0
- data/momblish.gemspec +29 -0
- metadata +115 -0
data/lib/freq.txt
ADDED
File without changes
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
class Momblish
|
4
|
+
class Corpus
|
5
|
+
attr_accessor :weighted_bigrams, :occurrences
|
6
|
+
|
7
|
+
def initialize(weighted_bigrams = {}, occurrences = {})
|
8
|
+
@weighted_bigrams = weighted_bigrams
|
9
|
+
@occurrences = occurrences
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.load(path)
|
13
|
+
data = File.read(path)
|
14
|
+
parsed = JSON.parse(data)
|
15
|
+
new(parsed['weighted_bigrams'], parsed['occurrences'])
|
16
|
+
end
|
17
|
+
|
18
|
+
def ==(other)
|
19
|
+
@weighted_bigrams == other.weighted_bigrams && @occurrences == other.occurrences
|
20
|
+
end
|
21
|
+
|
22
|
+
def save(path)
|
23
|
+
saved_corpus = {
|
24
|
+
weighted_bigrams: @weighted_bigrams,
|
25
|
+
occurrences: @occurrences
|
26
|
+
}
|
27
|
+
File.write(path, JSON.dump(saved_corpus))
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'debug'
|
3
|
+
|
4
|
+
class Momblish
|
5
|
+
class CorpusAnalyzer
|
6
|
+
PUNCTUATION = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\n".split('')
|
7
|
+
|
8
|
+
attr_accessor :words, :corpus
|
9
|
+
|
10
|
+
def initialize(corpus = [])
|
11
|
+
@words = corpus.map(&:rstrip)
|
12
|
+
@corpus = Corpus.new({}, {})
|
13
|
+
init_weighted_bigrams
|
14
|
+
init_occurrences
|
15
|
+
end
|
16
|
+
|
17
|
+
def init_weighted_bigrams
|
18
|
+
starting_bigrams = Hash.new(0)
|
19
|
+
|
20
|
+
filtered_words = @words.lazy.select do |word|
|
21
|
+
word.length > 2 && (word[0..1].chars & PUNCTUATION).empty?
|
22
|
+
end
|
23
|
+
|
24
|
+
filtered_words.each do |word|
|
25
|
+
bigram = word[0..1].upcase
|
26
|
+
starting_bigrams[bigram] += 1
|
27
|
+
end
|
28
|
+
|
29
|
+
total = starting_bigrams.values.sum
|
30
|
+
|
31
|
+
starting_bigrams.each do |bigram, count|
|
32
|
+
@corpus.weighted_bigrams[bigram] = count.to_f / total
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def init_occurrences
|
37
|
+
all_trigrams = @words.each.with_object([]) { |word, memo|
|
38
|
+
word_chars = word.chomp.upcase.chars
|
39
|
+
next if (word_chars & PUNCTUATION).any?
|
40
|
+
|
41
|
+
memo.concat(word_chars.each_cons(3).to_a)
|
42
|
+
}
|
43
|
+
|
44
|
+
occurrences = Hash.new { |h, k| h[k] = Hash.new(0) }
|
45
|
+
|
46
|
+
all_trigrams
|
47
|
+
.group_by { |trigram| trigram[0..1].join }
|
48
|
+
.each_pair do |bigram, trigrams|
|
49
|
+
trigrams.each do |trigram|
|
50
|
+
last_char = trigram.last
|
51
|
+
occurrences[bigram][last_char] += 1
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
@corpus.occurrences = Hash.new { |h, k| h[k] = Hash.new(&h.default_proc) }
|
56
|
+
|
57
|
+
occurrences.each do |bigram, last_letters|
|
58
|
+
total = last_letters.values.sum.to_f
|
59
|
+
last_letters.each do |last_letter, count|
|
60
|
+
@corpus.occurrences[bigram][last_letter] = count / total
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
data/lib/momblish.rb
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
require "momblish/version"
|
2
|
+
require "momblish/corpus_analyzer"
|
3
|
+
require "momblish/corpus"
|
4
|
+
|
5
|
+
class Momblish
|
6
|
+
class Error < StandardError; end
|
7
|
+
|
8
|
+
module WeightedSample
|
9
|
+
refine Hash do
|
10
|
+
def weighted_sample
|
11
|
+
self.max_by { |_, weight| rand ** (1.0 / weight) }&.first
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
using WeightedSample
|
17
|
+
|
18
|
+
DICT = {
|
19
|
+
'english' => ['/usr/share/dict/words', '/usr/dict/words', '/usr/share/dict/web2'],
|
20
|
+
'simple' => ["#{__dir__}/corpuses/simple.txt"],
|
21
|
+
'names' => ['/usr/share/dict/propernames', '/usr/dict/propernames'],
|
22
|
+
'spanish' => ["#{__dir__}/corpuses/spanish.txt"]
|
23
|
+
}
|
24
|
+
|
25
|
+
class EmptyCorpusError < StandardError
|
26
|
+
attr_reader :message
|
27
|
+
|
28
|
+
def initialize(message)
|
29
|
+
@message = message
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
class << self
|
34
|
+
def lookup_dict(lang)
|
35
|
+
DICT[lang].find { |location| puts location; File.exist?(location) }
|
36
|
+
end
|
37
|
+
|
38
|
+
def method_missing(lang)
|
39
|
+
if(DICT.has_key?(lang.to_s))
|
40
|
+
dict_file = lookup_dict(lang.to_s)
|
41
|
+
corpus = Momblish::CorpusAnalyzer.new(File.readlines(dict_file)).corpus
|
42
|
+
new(corpus)
|
43
|
+
else
|
44
|
+
super
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
attr_accessor :corpus
|
50
|
+
|
51
|
+
def initialize(corpus = nil)
|
52
|
+
@corpus = corpus || Corpus.new({}, {})
|
53
|
+
|
54
|
+
if @corpus.weighted_bigrams.empty? || @corpus.occurrences.empty?
|
55
|
+
raise EmptyCorpusError.new('Your corpus has no words')
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def word(length = nil)
|
60
|
+
length ||= rand(4..12)
|
61
|
+
|
62
|
+
word = @corpus.weighted_bigrams.keys.sample
|
63
|
+
|
64
|
+
(length - 2).times do
|
65
|
+
last_bigram = word[-2..-1]
|
66
|
+
|
67
|
+
next_letter = @corpus.occurrences[last_bigram].weighted_sample
|
68
|
+
|
69
|
+
return word.downcase if next_letter.nil?
|
70
|
+
|
71
|
+
word += next_letter
|
72
|
+
end
|
73
|
+
|
74
|
+
word.downcase
|
75
|
+
end
|
76
|
+
|
77
|
+
def sentence(count = nil, word_length = nil)
|
78
|
+
raise ArgumentError, 'You must provide a block or a count' if count.nil? && !block_given?
|
79
|
+
|
80
|
+
if block_given?
|
81
|
+
if count.nil?
|
82
|
+
loop { yield word(word_length) }
|
83
|
+
else
|
84
|
+
count.times { yield word(word_length) }
|
85
|
+
end
|
86
|
+
else
|
87
|
+
Array.new(count) { word(word_length) }
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
data/momblish.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "momblish/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "momblish"
|
8
|
+
spec.version = Momblish::VERSION
|
9
|
+
spec.authors = ["Stephen Prater"]
|
10
|
+
spec.email = ["me@stephenprater.com"]
|
11
|
+
spec.license = "MIT"
|
12
|
+
|
13
|
+
spec.summary = "Generate nonsense words in any language by corpus analysis"
|
14
|
+
spec.homepage = "https://github.com/stephenprater/momblish-rb"
|
15
|
+
|
16
|
+
# Specify which files should be added to the gem when it is released.
|
17
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
18
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
19
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
20
|
+
end
|
21
|
+
spec.bindir = "exe"
|
22
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
23
|
+
spec.require_paths = ["lib"]
|
24
|
+
|
25
|
+
spec.add_development_dependency "bundler", "~> 2.4"
|
26
|
+
spec.add_development_dependency "rake", "~> 13.1"
|
27
|
+
spec.add_development_dependency "minitest", "~> 5.20"
|
28
|
+
spec.add_development_dependency "debug", "~> 1.9"
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: momblish
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Stephen Prater
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-12-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.4'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.4'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '13.1'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '13.1'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '5.20'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '5.20'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: debug
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.9'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.9'
|
69
|
+
description:
|
70
|
+
email:
|
71
|
+
- me@stephenprater.com
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- ".gitignore"
|
77
|
+
- ".travis.yml"
|
78
|
+
- Gemfile
|
79
|
+
- Gemfile.lock
|
80
|
+
- README.md
|
81
|
+
- Rakefile
|
82
|
+
- bin/console
|
83
|
+
- bin/setup
|
84
|
+
- lib/corpuses/simple.txt
|
85
|
+
- lib/corpuses/spanish.txt
|
86
|
+
- lib/freq.txt
|
87
|
+
- lib/momblish.rb
|
88
|
+
- lib/momblish/corpus.rb
|
89
|
+
- lib/momblish/corpus_analyzer.rb
|
90
|
+
- lib/momblish/version.rb
|
91
|
+
- momblish.gemspec
|
92
|
+
homepage: https://github.com/stephenprater/momblish-rb
|
93
|
+
licenses:
|
94
|
+
- MIT
|
95
|
+
metadata: {}
|
96
|
+
post_install_message:
|
97
|
+
rdoc_options: []
|
98
|
+
require_paths:
|
99
|
+
- lib
|
100
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - ">="
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '0'
|
105
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - ">="
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
requirements: []
|
111
|
+
rubygems_version: 3.4.10
|
112
|
+
signing_key:
|
113
|
+
specification_version: 4
|
114
|
+
summary: Generate nonsense words in any language by corpus analysis
|
115
|
+
test_files: []
|