greeklish 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1ab370892f9c7e570ab8f9407d58534cdb3be66a
4
+ data.tar.gz: 36cfd3be6b235801e733adbccf7935917a12d929
5
+ SHA512:
6
+ metadata.gz: c5298e282e8a5b831086eda8a6156224a8c92a0ebb456a7b2980f7998ffac4b1faf53dc4736d5eb0f20e687bcd8b0b778d99f299ed4a6e5388940e55c332f74e
7
+ data.tar.gz: 83762172ec19a073afd9545dd097d81d5dece088ad0c5df28482fdf9c6be96237c9359dbf0808415a8f60178650433082f88246d609c86b2af5b8c88320f4c7c
data/.gitignore ADDED
@@ -0,0 +1,15 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ *.gem
15
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in greeklish.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Petros Markou
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,66 @@
1
+ # Greeklish
2
+
3
+ Generate greeklish forms from Greek words.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'greeklish'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install greeklish
20
+
21
+ ## Usage
22
+
23
+ Obtain an instance of `GreeklishConverter` as follows:
24
+
25
+ ```ruby
26
+ converter = Greeklish.converter(max_expansions: 2,
27
+ generate_greek_variants: false)
28
+
29
+ greeklish_words = converter.convert('ομπρελα') # => ["omprela", "obrela"]
30
+
31
+ ```
32
+
33
+ The option `max_expansions` denotes the maximum greeklish expansions for
34
+ each greek word, i.e:
35
+
36
+ ```ruby
37
+ converter = Greeklish.converter(max_expansions: 4,
38
+ generate_greek_variants: false)
39
+
40
+ converter.convert('αυτοκινητο') # =>
41
+ ["autokinhto", "aftokinhto", "avtokinhto", "aytokinhto"]
42
+ ```
43
+
44
+ The option `generate_greek_variants` denotes if greek variants should
45
+ be generated, i.e:
46
+
47
+ ```ruby
48
+ converter = Greeklish.converter(max_expansions: 2,
49
+ generate_greek_variants: true)
50
+
51
+ converter.convert('αμαξι') # =>
52
+ ["amaksi", "amaxi", "amaksiou", "amaxiou", "amaksia", "amaxia",
53
+ "amaksiwn", "amaxiwn"]
54
+ ```
55
+
56
+ ## Credits
57
+
58
+ Based on: [elasticsearch-analysis-greeklish](https://github.com/skroutz/elasticsearch-analysis-greeklish)
59
+
60
+ ## Contributing
61
+
62
+ 1. Fork it ( https://github.com/[my-github-username]/greeklish/fork )
63
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
64
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
65
+ 4. Push to the branch (`git push origin my-new-feature`)
66
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/greeklish.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'greeklish/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "greeklish"
8
+ spec.version = Greeklish::VERSION
9
+ spec.authors = ["Petros Markou"]
10
+ spec.email = ["markoupetr@skroutz.gr"]
11
+ spec.summary = %q{Generates greeklish forms}
12
+ spec.description = %q{Configurable generator of Greek words to greeklish forms.}
13
+ spec.homepage = "https://github.com/skroutz/greeklish"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec", "~> 3.1.0"
24
+ end
@@ -0,0 +1,112 @@
1
+ # coding: utf-8
2
+ module Greeklish
3
+ # Generates singular/plural variants of a greek word based on a
4
+ # combination of predefined rules.
5
+ class GreekReverseStemmer
6
+
7
+ # Constant variable that represents suffixes for pluralization
8
+ # of greeklish tokens.
9
+ SUFFIX_MATOS = "ματοσ"
10
+ SUFFIX_MATA = "ματα"
11
+ SUFFIX_MATWN = "ματων"
12
+ SUFFIX_AS = "ασ"
13
+ SUFFIX_EIA = "εια"
14
+ SUFFIX_EIO = "ειο"
15
+ SUFFIX_EIOY = "ειου"
16
+ SUFFIX_EIWN = "ειων"
17
+ SUFFIX_IOY = "ιου"
18
+ SUFFIX_IA = "ια"
19
+ SUFFIX_IWN = "ιων"
20
+ SUFFIX_OS = "οσ"
21
+ SUFFIX_OI = "οι"
22
+ SUFFIX_EIS = "εισ"
23
+ SUFFIX_ES = "εσ"
24
+ SUFFIX_HS = "ησ"
25
+ SUFFIX_WN = "ων"
26
+ SUFFIX_OY = "ου"
27
+ SUFFIX_O = "ο"
28
+ SUFFIX_H = "η"
29
+ SUFFIX_A = "α"
30
+ SUFFIX_I = "ι"
31
+
32
+ # The possible suffix strings.
33
+ SUFFIX_STRINGS = [
34
+ [SUFFIX_MATOS, "μα", "ματων", "ματα"],
35
+ [SUFFIX_MATA, "μα", "ματων", "ματοσ"],
36
+ [SUFFIX_MATWN, "μα", "ματα", "ματοσ"],
37
+ [SUFFIX_AS, "α", "ων", "εσ"],
38
+ [SUFFIX_EIA, "ειο", "ειων", "ειου", "ειασ"],
39
+ [SUFFIX_EIO, "εια", "ειων", "ειου"],
40
+ [SUFFIX_EIOY, "εια", "ειου", "ειο", "ειων"],
41
+ [SUFFIX_EIWN, "εια", "ειου", "ειο", "ειασ"],
42
+ [SUFFIX_IOY, "ι", "ια", "ιων", "ιο"],
43
+ [SUFFIX_IA, "ιου", "ι", "ιων", "ιασ", "ιο"],
44
+ [SUFFIX_IWN, "ιου", "ια", "ι", "ιο"],
45
+ [SUFFIX_OS, "η", "ουσ", "ου", "οι", "ων"],
46
+ [SUFFIX_OI, "οσ", "ου", "ων"],
47
+ [SUFFIX_EIS, "η", "ησ", "εων"],
48
+ [SUFFIX_ES, "η", "ασ", "ων", "ησ", "α"],
49
+ [SUFFIX_HS, "ων", "εσ", "η", "εων"],
50
+ [SUFFIX_WN, "οσ", "εσ", "α", "η", "ησ", "ου", "οι", "ο", "α"],
51
+ [SUFFIX_OY, "ων", "α", "ο", "οσ"],
52
+ [SUFFIX_O, "α", "ου", "εων", "ων"],
53
+ [SUFFIX_H, "οσ", "ουσ", "εων", "εισ", "ησ", "ων"],
54
+ [SUFFIX_A, "ο" , "ου", "ων", "ασ", "εσ"],
55
+ [SUFFIX_I, "ιου", "ια", "ιων"]
56
+ ]
57
+
58
+ # This hash has as keys all the suffixes that we want to handle in order
59
+ # to generate singular/plural greek words.
60
+ attr_reader :suffixes
61
+
62
+ # The greek word list
63
+ attr_reader :greek_words
64
+
65
+ def initialize
66
+ @suffixes = {}
67
+ @greek_words = []
68
+
69
+ # populate suffixes
70
+ SUFFIX_STRINGS.each do |suffix|
71
+ key = suffix[0]
72
+ val = suffix[1..suffix.length]
73
+ @suffixes[key] = val
74
+ end
75
+ end
76
+
77
+ # This method generates the greek variants of the greek token that
78
+ # receives.
79
+ #
80
+ # @param token_string the greek word
81
+ # @return a list of the generated greek word variations
82
+ def generate_greek_variants(token_string)
83
+ # clear the list from variations of the previous greek token
84
+ @greek_words.clear
85
+
86
+ # add the initial greek token in the greek words
87
+ @greek_words << token_string
88
+
89
+ # Find the first matching suffix and generate the variants
90
+ # of this word.
91
+ SUFFIX_STRINGS.each do |suffix|
92
+ if (token_string.end_with?(suffix[0]))
93
+ # Add to greek_words the tokens with the desired suffixes
94
+ generate_more_greek_words(token_string, suffix[0])
95
+ break
96
+ end
97
+ end
98
+
99
+ greek_words
100
+ end
101
+
102
+ # Generates more greek words based on the suffix of the original
103
+ # word.
104
+ #
105
+ # @param input_suffix the suffix that matched.
106
+ def generate_more_greek_words(input_token, input_suffix)
107
+ suffixes[input_suffix].each do |suffix|
108
+ @greek_words << input_token.gsub(/#{input_suffix}$/, suffix)
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,83 @@
1
+ # coding: utf-8
2
+ module Greeklish
3
+ # Generates singular/plural variants of greek tokens and converts them
4
+ # to tokens with latin characters from which are matched to the
5
+ # corresponding greek characters. A Greek character may have one or more
6
+ # latin counterparts. so, from a Greek token one or more latin tokens are
7
+ # generated. Greek words have combination of vowels called digraphs. Because
8
+ # digraphs are special cases, they are treated separately.
9
+ class GreeklishConverter
10
+
11
+ # Tokens that contain only these characters will be affected by this
12
+ # filter.
13
+ GREEK_CHARACTERS = "αβγδεζηθικλμνξοπρστυφχψω"
14
+
15
+ # Keep the generated greek words from the greek reverse stemmer.
16
+ attr_reader :greek_words
17
+
18
+ # Input token converted into String.
19
+ attr_reader :token_string
20
+
21
+ # Instance of the reverse stemmer that generates the word variants
22
+ # of the greek token.
23
+ attr_reader :reverse_stemmer
24
+
25
+ # Instance of the greeklish generator that generates the greeklish
26
+ # words from the words that are returned by the greek reverse
27
+ # stemmer.
28
+ attr_reader :greeklish_generator
29
+
30
+ # Setting which is set in the configuration file that defines
31
+ # whether the user wants to generate greek variants.
32
+ attr_reader :generate_greek_variants
33
+
34
+ def initialize(max_expansions, generate_greek_variants)
35
+ @greek_words = []
36
+ @reverse_stemmer = GreekReverseStemmer.new
37
+ @greeklish_generator = GreeklishGenerator.new(max_expansions)
38
+ @generate_greek_variants = generate_greek_variants
39
+ end
40
+
41
+ # The actual conversion is happening here.
42
+ #
43
+ # @param input_token the Greek token
44
+ # @param token_length the length of the input token
45
+ # @return A list of the generated strings
46
+ def convert(input_token)
47
+ # Is this a Greek word?
48
+ if (!identify_greek_word(input_token))
49
+ return nil
50
+ end
51
+
52
+ # if generating greek variants is on
53
+ if (generate_greek_variants)
54
+ # generate them
55
+ @greek_words = reverse_stemmer.generate_greek_variants(input_token)
56
+ else
57
+ @greek_words << input_token
58
+ end
59
+
60
+ # if there are greek words
61
+ if (greek_words.size > 0)
62
+ # generate their greeklish version
63
+ return greeklish_generator.generate_greeklish_words(greek_words)
64
+ end
65
+
66
+ nil
67
+ end
68
+
69
+ # Identifies words with only Greek lowercase characters.
70
+ #
71
+ # @param input The string that will examine
72
+ # @return true if the string contains only Greek characters
73
+ def identify_greek_word(input)
74
+ input.each_char do |char|
75
+ if (!GREEK_CHARACTERS.include?(char))
76
+ return false
77
+ end
78
+ end
79
+
80
+ true
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,146 @@
1
+ # coding: utf-8
2
+ module Greeklish
3
+ # Generates greeklish tokens that represent the character that
4
+ # substitutes a digraph.
5
+ class GreeklishGenerator
6
+
7
+ # Constant variables that represent the character that substitutes
8
+ # a digraph.
9
+ AI = "Α"
10
+ EI = "Ε"
11
+ OI = "Ο"
12
+ OY = "Υ"
13
+ EY = "Φ"
14
+ AY = "Β"
15
+ MP = "Μ"
16
+ GG = "Γ"
17
+ GK = "Κ"
18
+ NT = "Ν"
19
+
20
+ # Each digraph is replaced by a special capital Greek character.
21
+ attr_accessor :digraphs
22
+
23
+ # This hash has keys all the possible conversions that can be applied
24
+ # and values the strings that can replace the corresponding Greek
25
+ # character.
26
+ attr_accessor :conversions
27
+
28
+ # The possible digraph cases.
29
+ DIGRAPH_CASES = [
30
+ ["αι", AI], ["ει", EI], ["οι", OI], ["ου", OY],
31
+ ["ευ", EY], ["αυ", AY], ["μπ", MP], ["γγ", GG],
32
+ ["γκ", GK], ["ντ", NT]
33
+ ]
34
+
35
+ # The possible string conversions for each case.
36
+ CONVERT_STRINGS = [
37
+ [AI, "ai", "e"], [EI, "ei", "i"], [OI, "oi", "i"],
38
+ [OY, "ou", "oy", "u"], [EY, "eu", "ef", "ev", "ey"],
39
+ [AY, "au", "af", "av", "ay"], [MP, "mp", "b"],
40
+ [GG, "gg", "g"], [GK, "gk", "g"], [NT, "nt", "d"],
41
+ ["α", "a"], ["β", "b", "v"], ["γ", "g"], ["δ", "d"],
42
+ ["ε", "e"], ["ζ", "z"], ["η", "h", "i"], ["θ", "th"],
43
+ ["ι", "i"], ["κ", "k"], ["λ", "l"], ["μ", "m"],
44
+ ["ν", "n"], ["ξ", "ks", "x"], ["ο", "o"], ["π", "p"],
45
+ ["ρ", "r"], ["σ", "s"], ["τ", "t"], ["υ", "y", "u", "i"],
46
+ ["φ", "f", "ph"], ["χ", "x", "h", "ch"], ["ψ", "ps"],
47
+ ["ω", "w", "o", "v"]
48
+ ]
49
+
50
+ # The maximum greeklish expansions per greek token.
51
+ attr_reader :max_expansions
52
+
53
+ # A list of greeklish token per each greek word.
54
+ attr_reader :per_word_greeklish
55
+
56
+ # Keep the generated strings in a list. The populated
57
+ # list is returned to the filter.
58
+ attr_reader :greeklish_list
59
+
60
+ def initialize(max_expansions)
61
+ @max_expansions = max_expansions
62
+ @greeklish_list = []
63
+ @per_word_greeklish = []
64
+ @digraphs = {}
65
+ @conversions = Hash.new([])
66
+
67
+ # populate digraphs
68
+ DIGRAPH_CASES.each do |digraph_case|
69
+ key = digraph_case[0]
70
+ value = digraph_case[1]
71
+ @digraphs[key] = value
72
+ end
73
+
74
+ # populate conversions
75
+ CONVERT_STRINGS.each do |convert_string|
76
+ key = convert_string[0]
77
+ value = convert_string[1..convert_string.length]
78
+ @conversions[key] = value
79
+ end
80
+ end
81
+
82
+ # Gets a list of greek words and generates the greeklish version of
83
+ # each word.
84
+ #
85
+ # @param greek_words a list of greek words
86
+ # @return a list of greeklish words
87
+ def generate_greeklish_words(greek_words)
88
+ @greeklish_list.clear
89
+
90
+ greek_words.each do |greek_word|
91
+ @per_word_greeklish.clear
92
+
93
+ initial_token = greek_word
94
+
95
+ digraphs.each_key do |key|
96
+ greek_word = greek_word.gsub(key, digraphs[key])
97
+ end
98
+
99
+ # Convert it back to array of characters. The iterations of each
100
+ # character will take place through this array.
101
+ input_token = greek_word.split(//)
102
+
103
+ # Iterate through the characters of the token and generate
104
+ # greeklish words.
105
+ input_token.each do |greek_char|
106
+ add_character(conversions[greek_char])
107
+ end
108
+
109
+ @greeklish_list << per_word_greeklish.flatten
110
+ end
111
+
112
+ @greeklish_list.flatten
113
+ end
114
+
115
+ # Add the matching latin characters to the generated greeklish tokens
116
+ # for a specific Greek character. For each different combination of
117
+ # latin characters, a new token is generated.
118
+ #
119
+ # @param convert_strings the latin characters that will be added to the tokens
120
+ private
121
+
122
+ def add_character(convert_strings)
123
+ if (per_word_greeklish.empty?)
124
+ convert_strings.each do |convert_string|
125
+ if (per_word_greeklish.size >= max_expansions)
126
+ break
127
+ end
128
+ @per_word_greeklish << convert_string
129
+ end
130
+ else
131
+ new_tokens = []
132
+
133
+ convert_strings.each do |convert_string|
134
+ per_word_greeklish.each do |token|
135
+ if (new_tokens.size >= max_expansions)
136
+ break
137
+ end
138
+ new_tokens << "#{token}#{convert_string}"
139
+ end
140
+ end
141
+
142
+ @per_word_greeklish = new_tokens
143
+ end
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,3 @@
1
+ module Greeklish
2
+ VERSION = "0.0.1"
3
+ end
data/lib/greeklish.rb ADDED
@@ -0,0 +1,11 @@
1
+ require "greeklish/version"
2
+ require "greeklish/greeklish_generator"
3
+ require "greeklish/greek_reverse_stemmer"
4
+ require "greeklish/greeklish_converter"
5
+
6
+ module Greeklish
7
+ def self.converter(options={})
8
+ GreeklishConverter.new(options[:max_expansions],
9
+ options[:generate_greek_variants])
10
+ end
11
+ end
@@ -0,0 +1,98 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe 'GreeklishConverter' do
5
+ max_expansions = 10
6
+ generate_greek_variants = true
7
+
8
+ # a sample of greek words to generate their greeklish
9
+ # counterparts.
10
+ greek_words = ["αυτοκινητο", "ομπρελα", "ξεσκεπαστοσ"]
11
+
12
+ # the greeklish counterparts that should be generated from the greek words.
13
+ generated_greeklish_words = [
14
+ ["autokinhto", "aftokinhto", "avtokinhto", "aytokinhto",
15
+ "autokinito", "aftokinito", "avtokinito", "aytokinito",
16
+ "autokinhtwn", "aftokinhta", "avtokinhta", "aytokinhtwn"],
17
+ ["omprela", "obrela", "ompreles", "obrelwn", "obreles", "omprelas"],
18
+ ["kseskepastos", "xeskepastos", "kseskepastou", "xeskepastwn", "kseskepastoi"]
19
+ ]
20
+
21
+ # these words should not be processed by the converter.
22
+ invalid_words = ["mobile", "αυριο64", "καλάθι", "ΣΠιτι", "ομορφος" ]
23
+
24
+ before(:each) do
25
+ @greeklish_words = []
26
+ @converted_greeklish_strings = []
27
+ end
28
+
29
+ after(:each) do
30
+ @converted_greeklish_strings = []
31
+ @greeklish_words = []
32
+ end
33
+
34
+ it "does not convert invalid words" do
35
+ converter = Greeklish::GreeklishConverter.new(max_expansions, generate_greek_variants)
36
+
37
+ invalid_words.each do |invalid_word|
38
+ @greeklish_words = converter.convert(invalid_word)
39
+ expect(@greeklish_words.nil?).to eq(true)
40
+ end
41
+ end
42
+
43
+ it "converts valid words" do
44
+ converter = Greeklish::GreeklishConverter.new(max_expansions, generate_greek_variants)
45
+
46
+ greek_words.each_with_index do |word, i|
47
+ @greeklish_words = converter.convert(greek_words[i])
48
+ populate_converted_strings_list
49
+
50
+ expect(@greeklish_words.empty?).to eq(false)
51
+
52
+ generated_greeklish_words[i].each do |greeklish_word|
53
+ expect(@converted_greeklish_strings.include?(greeklish_word)).to eq(true)
54
+ end
55
+ end
56
+ end
57
+
58
+ it "respects max expansions" do
59
+ new_max_expansions = 2
60
+ generate_greek_variants = false
61
+ converter = Greeklish::GreeklishConverter.new(new_max_expansions, generate_greek_variants)
62
+
63
+ @greeklish_words = converter.convert(greek_words[0])
64
+
65
+ populate_converted_strings_list()
66
+
67
+ expect(@greeklish_words.size).to eq(new_max_expansions)
68
+
69
+ for i in 0..new_max_expansions-1 do
70
+ expect(@converted_greeklish_strings.include?(generated_greeklish_words[0][i])).to eq(true)
71
+ end
72
+
73
+ for j in new_max_expansions..generated_greeklish_words[0].length - 1 do
74
+ expect(@converted_greeklish_strings.include?(generated_greeklish_words[0][j])).to eq(false)
75
+ end
76
+ end
77
+
78
+ it "respects variant generation" do
79
+ new_max_expansions = 1
80
+ generate_greek_variants = false
81
+ converter = Greeklish::GreeklishConverter.new(new_max_expansions, generate_greek_variants)
82
+
83
+ @greeklish_words = converter.convert(greek_words[0])
84
+
85
+ populate_converted_strings_list()
86
+
87
+ expect(@converted_greeklish_strings.include?(generated_greeklish_words[0][0])).to eq(true)
88
+ expect(@converted_greeklish_strings.include?(generated_greeklish_words[0][9])).to eq(false)
89
+ end
90
+
91
+ private
92
+
93
+ def populate_converted_strings_list
94
+ @greeklish_words.each do |word|
95
+ @converted_greeklish_strings << word
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,64 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe 'GreeklishGenerator' do
5
+ max_expansions = 10
6
+
7
+ # a sample of greek words to generate their greeklish
8
+ # counterparts.
9
+ greek_words = ["αυτοκινητο", "ομπρελα", "ξεσκεπαστοσ"]
10
+
11
+ # the greeklish counterparts that should be generated
12
+ # from the greek words.
13
+ generated_greeklish_words = [
14
+ "autokinhto", "aftokinhto", "avtokinhto", "aytokinhto",
15
+ "autokinito", "aftokinito", "avtokinito", "aytokinito",
16
+ "omprela", "obrela", "kseskepastos", "xeskepastos"
17
+ ]
18
+
19
+ before(:each) do
20
+ @input_greek_list = []
21
+ @greeklish_words = []
22
+ @converted_greeklish_strings = []
23
+
24
+ @generator = Greeklish::GreeklishGenerator.new(max_expansions)
25
+ greek_words.each do |word|
26
+ @input_greek_list << word
27
+ end
28
+ end
29
+
30
+ after(:each) do
31
+ @converted_greeklish_strings = []
32
+ end
33
+
34
+ it "converts valid words" do
35
+ greek_words.each do |word|
36
+ @greeklish_words = @generator.generate_greeklish_words(@input_greek_list)
37
+
38
+ populate_converted_strings_list
39
+
40
+ expect(@greeklish_words.empty?).to eq(false)
41
+
42
+ generated_greeklish_words.each do |greeklish_word|
43
+ expect(@converted_greeklish_strings.include?(greeklish_word)).to eq(true)
44
+ end
45
+ end
46
+ end
47
+
48
+ it "respects the max expansion setting" do
49
+ @input_greek_list = []
50
+ new_max_expansions = 2
51
+ generator = Greeklish::GreeklishGenerator.new(new_max_expansions)
52
+
53
+ greeklish_words = generator.generate_greeklish_words(@input_greek_list)
54
+ expect(greeklish_words.size).to eq(new_max_expansions * @input_greek_list.size)
55
+ end
56
+
57
+ private
58
+
59
+ def populate_converted_strings_list
60
+ @greeklish_words.each do |word|
61
+ @converted_greeklish_strings << word
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,46 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe 'GreeklishReverseStemmer' do
5
+ # Some greek words whose variations we want to produce.
6
+ greek_words = ["κουρεματοσ", "ενδυματα", "γραφειου", "πεδιου",
7
+ "γραναζι", "ποδηλατα", "καλωδιων"]
8
+
9
+ # Words that should not match to any rule.
10
+ non_matching_words = ["σουτιεν", "κολλαν", "αμπαλαζ", "μακιγιαζ"]
11
+
12
+ # The output we expect for each of the above words.
13
+ greek_variants = [
14
+ ["κουρεμα", "κουρεματων", "κουρεματα"],
15
+ ["ενδυμα", "ενδυματων", "ενδυματα", "ενδυματοσ"],
16
+ ["γραφειο", "γραφεια", "γραφειων"],
17
+ ["πεδια", "πεδιο", "πεδιων"],
18
+ ["γραναζια", "γραναζιου", "γραναζιων"],
19
+ ["ποδηλατο", "ποδηλατου", "ποδηλατα", "ποδηλατων"],
20
+ ["καλωδιου", "καλωδια", "καλωδιο"]
21
+ ]
22
+
23
+ before(:all) do
24
+ @reverse_stemmer = Greeklish::GreekReverseStemmer.new
25
+ end
26
+
27
+ it "produces greek variants" do
28
+ greek_words.each_with_index do |word, index|
29
+ generated_greek_variants = @reverse_stemmer.generate_greek_variants(word)
30
+
31
+ expect(generated_greek_variants.size > 1).to eq(true)
32
+
33
+ greek_variants[index].each do |greek_variant|
34
+ expect(generated_greek_variants.include?(greek_variant)).to eq(true)
35
+ end
36
+ end
37
+ end
38
+
39
+ it "does not produce variants for non matching words" do
40
+ non_matching_words.each do |non_matching_word|
41
+ generated_greek_variants = @reverse_stemmer.generate_greek_variants(non_matching_word)
42
+
43
+ expect(generated_greek_variants.size).to eq(1)
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,14 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe 'Greeklish' do
5
+ it "correctly converts to greeklish" do
6
+ converter = Greeklish.converter(max_expansions: 2,
7
+ generate_greek_variants: false)
8
+
9
+ words = converter.convert("ομπρελα")
10
+
11
+ expect(words.length).to eq(2)
12
+ expect(words).to include("omprela", "obrela")
13
+ end
14
+ end
@@ -0,0 +1,91 @@
1
+ require 'greeklish'
2
+
3
+ # This file was generated by the `rspec --init` command. Conventionally, all
4
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
5
+ # The generated `.rspec` file contains `--require spec_helper` which will cause this
6
+ # file to always be loaded, without a need to explicitly require it in any files.
7
+ #
8
+ # Given that it is always loaded, you are encouraged to keep this file as
9
+ # light-weight as possible. Requiring heavyweight dependencies from this file
10
+ # will add to the boot time of your test suite on EVERY test run, even for an
11
+ # individual file that may not need all of that loaded. Instead, consider making
12
+ # a separate helper file that requires the additional dependencies and performs
13
+ # the additional setup, and require it from the spec files that actually need it.
14
+ #
15
+ # The `.rspec` file also contains a few flags that are not defaults but that
16
+ # users commonly want.
17
+ #
18
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
19
+ RSpec.configure do |config|
20
+ # rspec-expectations config goes here. You can use an alternate
21
+ # assertion/expectation library such as wrong or the stdlib/minitest
22
+ # assertions if you prefer.
23
+ config.expect_with :rspec do |expectations|
24
+ # This option will default to `true` in RSpec 4. It makes the `description`
25
+ # and `failure_message` of custom matchers include text for helper methods
26
+ # defined using `chain`, e.g.:
27
+ # be_bigger_than(2).and_smaller_than(4).description
28
+ # # => "be bigger than 2 and smaller than 4"
29
+ # ...rather than:
30
+ # # => "be bigger than 2"
31
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
32
+ end
33
+
34
+ # rspec-mocks config goes here. You can use an alternate test double
35
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
36
+ config.mock_with :rspec do |mocks|
37
+ # Prevents you from mocking or stubbing a method that does not exist on
38
+ # a real object. This is generally recommended, and will default to
39
+ # `true` in RSpec 4.
40
+ mocks.verify_partial_doubles = true
41
+ end
42
+
43
+ # The settings below are suggested to provide a good initial experience
44
+ # with RSpec, but feel free to customize to your heart's content.
45
+ =begin
46
+ # These two settings work together to allow you to limit a spec run
47
+ # to individual examples or groups you care about by tagging them with
48
+ # `:focus` metadata. When nothing is tagged with `:focus`, all examples
49
+ # get run.
50
+ config.filter_run :focus
51
+ config.run_all_when_everything_filtered = true
52
+
53
+ # Limits the available syntax to the non-monkey patched syntax that is recommended.
54
+ # For more details, see:
55
+ # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
56
+ # - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
57
+ # - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching
58
+ config.disable_monkey_patching!
59
+
60
+ # This setting enables warnings. It's recommended, but in some cases may
61
+ # be too noisy due to issues in dependencies.
62
+ config.warnings = true
63
+
64
+ # Many RSpec users commonly either run the entire suite or an individual
65
+ # file, and it's useful to allow more verbose output when running an
66
+ # individual spec file.
67
+ if config.files_to_run.one?
68
+ # Use the documentation formatter for detailed output,
69
+ # unless a formatter has already been configured
70
+ # (e.g. via a command-line flag).
71
+ config.default_formatter = 'doc'
72
+ end
73
+
74
+ # Print the 10 slowest examples and example groups at the
75
+ # end of the spec run, to help surface which specs are running
76
+ # particularly slow.
77
+ config.profile_examples = 10
78
+
79
+ # Run specs in random order to surface order dependencies. If you find an
80
+ # order dependency and want to debug it, you can fix the order by providing
81
+ # the seed, which is printed after each run.
82
+ # --seed 1234
83
+ config.order = :random
84
+
85
+ # Seed global randomization in this process using the `--seed` CLI option.
86
+ # Setting this allows you to use `--seed` to deterministically reproduce
87
+ # test failures related to randomization by passing the same `--seed` value
88
+ # as the one that triggered the failure.
89
+ Kernel.srand config.seed
90
+ =end
91
+ end
metadata ADDED
@@ -0,0 +1,109 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: greeklish
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Petros Markou
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-04-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 3.1.0
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 3.1.0
55
+ description: Configurable generator of Greek words to greeklish forms.
56
+ email:
57
+ - markoupetr@skroutz.gr
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".rspec"
64
+ - Gemfile
65
+ - LICENSE.txt
66
+ - README.md
67
+ - Rakefile
68
+ - greeklish.gemspec
69
+ - lib/greeklish.rb
70
+ - lib/greeklish/greek_reverse_stemmer.rb
71
+ - lib/greeklish/greeklish_converter.rb
72
+ - lib/greeklish/greeklish_generator.rb
73
+ - lib/greeklish/version.rb
74
+ - spec/greeklish_converter_spec.rb
75
+ - spec/greeklish_generator_spec.rb
76
+ - spec/greeklish_reverse_stemmer_spec.rb
77
+ - spec/greeklish_spec.rb
78
+ - spec/spec_helper.rb
79
+ homepage: https://github.com/skroutz/greeklish
80
+ licenses:
81
+ - MIT
82
+ metadata: {}
83
+ post_install_message:
84
+ rdoc_options: []
85
+ require_paths:
86
+ - lib
87
+ required_ruby_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ required_rubygems_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ requirements: []
98
+ rubyforge_project:
99
+ rubygems_version: 2.4.6
100
+ signing_key:
101
+ specification_version: 4
102
+ summary: Generates greeklish forms
103
+ test_files:
104
+ - spec/greeklish_converter_spec.rb
105
+ - spec/greeklish_generator_spec.rb
106
+ - spec/greeklish_reverse_stemmer_spec.rb
107
+ - spec/greeklish_spec.rb
108
+ - spec/spec_helper.rb
109
+ has_rdoc: