greeklish 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1ab370892f9c7e570ab8f9407d58534cdb3be66a
4
+ data.tar.gz: 36cfd3be6b235801e733adbccf7935917a12d929
5
+ SHA512:
6
+ metadata.gz: c5298e282e8a5b831086eda8a6156224a8c92a0ebb456a7b2980f7998ffac4b1faf53dc4736d5eb0f20e687bcd8b0b778d99f299ed4a6e5388940e55c332f74e
7
+ data.tar.gz: 83762172ec19a073afd9545dd097d81d5dece088ad0c5df28482fdf9c6be96237c9359dbf0808415a8f60178650433082f88246d609c86b2af5b8c88320f4c7c
data/.gitignore ADDED
@@ -0,0 +1,15 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ *.gem
15
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in greeklish.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Petros Markou
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,66 @@
1
+ # Greeklish
2
+
3
+ Generate greeklish forms from Greek words.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'greeklish'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install greeklish
20
+
21
+ ## Usage
22
+
23
+ Obtain an instance of `GreeklishConverter` as follows:
24
+
25
+ ```ruby
26
+ converter = Greeklish.converter(max_expansions: 2,
27
+ generate_greek_variants: false)
28
+
29
+ greeklish_words = converter.convert('ομπρελα') # => ["omprela", "obrela"]
30
+
31
+ ```
32
+
33
+ The option `max_expansions` denotes the maximum greeklish expansions for
34
+ each greek word, i.e:
35
+
36
+ ```ruby
37
+ converter = Greeklish.converter(max_expansions: 4,
38
+ generate_greek_variants: false)
39
+
40
+ converter.convert('αυτοκινητο') # =>
41
+ ["autokinhto", "aftokinhto", "avtokinhto", "aytokinhto"]
42
+ ```
43
+
44
+ The option `generate_greek_variants` denotes if greek variants should
45
+ be generated, i.e:
46
+
47
+ ```ruby
48
+ converter = Greeklish.converter(max_expansions: 2,
49
+ generate_greek_variants: true)
50
+
51
+ converter.convert('αμαξι') # =>
52
+ ["amaksi", "amaxi", "amaksiou", "amaxiou", "amaksia", "amaxia",
53
+ "amaksiwn", "amaxiwn"]
54
+ ```
55
+
56
+ ## Credits
57
+
58
+ Based on: [elasticsearch-analysis-greeklish](https://github.com/skroutz/elasticsearch-analysis-greeklish)
59
+
60
+ ## Contributing
61
+
62
+ 1. Fork it ( https://github.com/[my-github-username]/greeklish/fork )
63
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
64
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
65
+ 4. Push to the branch (`git push origin my-new-feature`)
66
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/greeklish.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'greeklish/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "greeklish"
8
+ spec.version = Greeklish::VERSION
9
+ spec.authors = ["Petros Markou"]
10
+ spec.email = ["markoupetr@skroutz.gr"]
11
+ spec.summary = %q{Generates greeklish forms}
12
+ spec.description = %q{Configurable generator of Greek words to greeklish forms.}
13
+ spec.homepage = "https://github.com/skroutz/greeklish"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec", "~> 3.1.0"
24
+ end
@@ -0,0 +1,112 @@
1
+ # coding: utf-8
2
+ module Greeklish
3
+ # Generates singular/plural variants of a greek word based on a
4
+ # combination of predefined rules.
5
+ class GreekReverseStemmer
6
+
7
+ # Constant variable that represents suffixes for pluralization
8
+ # of greeklish tokens.
9
+ SUFFIX_MATOS = "ματοσ"
10
+ SUFFIX_MATA = "ματα"
11
+ SUFFIX_MATWN = "ματων"
12
+ SUFFIX_AS = "ασ"
13
+ SUFFIX_EIA = "εια"
14
+ SUFFIX_EIO = "ειο"
15
+ SUFFIX_EIOY = "ειου"
16
+ SUFFIX_EIWN = "ειων"
17
+ SUFFIX_IOY = "ιου"
18
+ SUFFIX_IA = "ια"
19
+ SUFFIX_IWN = "ιων"
20
+ SUFFIX_OS = "οσ"
21
+ SUFFIX_OI = "οι"
22
+ SUFFIX_EIS = "εισ"
23
+ SUFFIX_ES = "εσ"
24
+ SUFFIX_HS = "ησ"
25
+ SUFFIX_WN = "ων"
26
+ SUFFIX_OY = "ου"
27
+ SUFFIX_O = "ο"
28
+ SUFFIX_H = "η"
29
+ SUFFIX_A = "α"
30
+ SUFFIX_I = "ι"
31
+
32
+ # The possible suffix strings.
33
+ SUFFIX_STRINGS = [
34
+ [SUFFIX_MATOS, "μα", "ματων", "ματα"],
35
+ [SUFFIX_MATA, "μα", "ματων", "ματοσ"],
36
+ [SUFFIX_MATWN, "μα", "ματα", "ματοσ"],
37
+ [SUFFIX_AS, "α", "ων", "εσ"],
38
+ [SUFFIX_EIA, "ειο", "ειων", "ειου", "ειασ"],
39
+ [SUFFIX_EIO, "εια", "ειων", "ειου"],
40
+ [SUFFIX_EIOY, "εια", "ειου", "ειο", "ειων"],
41
+ [SUFFIX_EIWN, "εια", "ειου", "ειο", "ειασ"],
42
+ [SUFFIX_IOY, "ι", "ια", "ιων", "ιο"],
43
+ [SUFFIX_IA, "ιου", "ι", "ιων", "ιασ", "ιο"],
44
+ [SUFFIX_IWN, "ιου", "ια", "ι", "ιο"],
45
+ [SUFFIX_OS, "η", "ουσ", "ου", "οι", "ων"],
46
+ [SUFFIX_OI, "οσ", "ου", "ων"],
47
+ [SUFFIX_EIS, "η", "ησ", "εων"],
48
+ [SUFFIX_ES, "η", "ασ", "ων", "ησ", "α"],
49
+ [SUFFIX_HS, "ων", "εσ", "η", "εων"],
50
+ [SUFFIX_WN, "οσ", "εσ", "α", "η", "ησ", "ου", "οι", "ο", "α"],
51
+ [SUFFIX_OY, "ων", "α", "ο", "οσ"],
52
+ [SUFFIX_O, "α", "ου", "εων", "ων"],
53
+ [SUFFIX_H, "οσ", "ουσ", "εων", "εισ", "ησ", "ων"],
54
+ [SUFFIX_A, "ο" , "ου", "ων", "ασ", "εσ"],
55
+ [SUFFIX_I, "ιου", "ια", "ιων"]
56
+ ]
57
+
58
+ # This hash has as keys all the suffixes that we want to handle in order
59
+ # to generate singular/plural greek words.
60
+ attr_reader :suffixes
61
+
62
+ # The greek word list
63
+ attr_reader :greek_words
64
+
65
+ def initialize
66
+ @suffixes = {}
67
+ @greek_words = []
68
+
69
+ # populate suffixes
70
+ SUFFIX_STRINGS.each do |suffix|
71
+ key = suffix[0]
72
+ val = suffix[1..suffix.length]
73
+ @suffixes[key] = val
74
+ end
75
+ end
76
+
77
+ # This method generates the greek variants of the greek token that
78
+ # receives.
79
+ #
80
+ # @param token_string the greek word
81
+ # @return a list of the generated greek word variations
82
+ def generate_greek_variants(token_string)
83
+ # clear the list from variations of the previous greek token
84
+ @greek_words.clear
85
+
86
+ # add the initial greek token in the greek words
87
+ @greek_words << token_string
88
+
89
+ # Find the first matching suffix and generate the variants
90
+ # of this word.
91
+ SUFFIX_STRINGS.each do |suffix|
92
+ if (token_string.end_with?(suffix[0]))
93
+ # Add to greek_words the tokens with the desired suffixes
94
+ generate_more_greek_words(token_string, suffix[0])
95
+ break
96
+ end
97
+ end
98
+
99
+ greek_words
100
+ end
101
+
102
+ # Generates more greek words based on the suffix of the original
103
+ # word.
104
+ #
105
+ # @param input_suffix the suffix that matched.
106
+ def generate_more_greek_words(input_token, input_suffix)
107
+ suffixes[input_suffix].each do |suffix|
108
+ @greek_words << input_token.gsub(/#{input_suffix}$/, suffix)
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,83 @@
1
+ # coding: utf-8
2
+ module Greeklish
3
+ # Generates singular/plural variants of greek tokens and converts them
4
+ # to tokens with latin characters from which are matched to the
5
+ # corresponding greek characters. A Greek character may have one or more
6
+ # latin counterparts. so, from a Greek token one or more latin tokens are
7
+ # generated. Greek words have combination of vowels called digraphs. Because
8
+ # digraphs are special cases, they are treated separately.
9
+ class GreeklishConverter
10
+
11
+ # Tokens that contain only these characters will be affected by this
12
+ # filter.
13
+ GREEK_CHARACTERS = "αβγδεζηθικλμνξοπρστυφχψω"
14
+
15
+ # Keep the generated greek words from the greek reverse stemmer.
16
+ attr_reader :greek_words
17
+
18
+ # Input token converted into String.
19
+ attr_reader :token_string
20
+
21
+ # Instance of the reverse stemmer that generates the word variants
22
+ # of the greek token.
23
+ attr_reader :reverse_stemmer
24
+
25
+ # Instance of the greeklish generator that generates the greeklish
26
+ # words from the words that are returned by the greek reverse
27
+ # stemmer.
28
+ attr_reader :greeklish_generator
29
+
30
+ # Setting which is set in the configuration file that defines
31
+ # whether the user wants to generate greek variants.
32
+ attr_reader :generate_greek_variants
33
+
34
+ def initialize(max_expansions, generate_greek_variants)
35
+ @greek_words = []
36
+ @reverse_stemmer = GreekReverseStemmer.new
37
+ @greeklish_generator = GreeklishGenerator.new(max_expansions)
38
+ @generate_greek_variants = generate_greek_variants
39
+ end
40
+
41
+ # The actual conversion is happening here.
42
+ #
43
+ # @param input_token the Greek token
44
+ # @param token_length the length of the input token
45
+ # @return A list of the generated strings
46
+ def convert(input_token)
47
+ # Is this a Greek word?
48
+ if (!identify_greek_word(input_token))
49
+ return nil
50
+ end
51
+
52
+ # if generating greek variants is on
53
+ if (generate_greek_variants)
54
+ # generate them
55
+ @greek_words = reverse_stemmer.generate_greek_variants(input_token)
56
+ else
57
+ @greek_words << input_token
58
+ end
59
+
60
+ # if there are greek words
61
+ if (greek_words.size > 0)
62
+ # generate their greeklish version
63
+ return greeklish_generator.generate_greeklish_words(greek_words)
64
+ end
65
+
66
+ nil
67
+ end
68
+
69
+ # Identifies words with only Greek lowercase characters.
70
+ #
71
+ # @param input The string that will examine
72
+ # @return true if the string contains only Greek characters
73
+ def identify_greek_word(input)
74
+ input.each_char do |char|
75
+ if (!GREEK_CHARACTERS.include?(char))
76
+ return false
77
+ end
78
+ end
79
+
80
+ true
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,146 @@
1
+ # coding: utf-8
2
+ module Greeklish
3
+ # Generates greeklish tokens that represent the character that
4
+ # substitutes a digraph.
5
+ class GreeklishGenerator
6
+
7
+ # Constant variables that represent the character that substitutes
8
+ # a digraph.
9
+ AI = "Α"
10
+ EI = "Ε"
11
+ OI = "Ο"
12
+ OY = "Υ"
13
+ EY = "Φ"
14
+ AY = "Β"
15
+ MP = "Μ"
16
+ GG = "Γ"
17
+ GK = "Κ"
18
+ NT = "Ν"
19
+
20
+ # Each digraph is replaced by a special capital Greek character.
21
+ attr_accessor :digraphs
22
+
23
+ # This hash has keys all the possible conversions that can be applied
24
+ # and values the strings that can replace the corresponding Greek
25
+ # character.
26
+ attr_accessor :conversions
27
+
28
+ # The possible digraph cases.
29
+ DIGRAPH_CASES = [
30
+ ["αι", AI], ["ει", EI], ["οι", OI], ["ου", OY],
31
+ ["ευ", EY], ["αυ", AY], ["μπ", MP], ["γγ", GG],
32
+ ["γκ", GK], ["ντ", NT]
33
+ ]
34
+
35
+ # The possible string conversions for each case.
36
+ CONVERT_STRINGS = [
37
+ [AI, "ai", "e"], [EI, "ei", "i"], [OI, "oi", "i"],
38
+ [OY, "ou", "oy", "u"], [EY, "eu", "ef", "ev", "ey"],
39
+ [AY, "au", "af", "av", "ay"], [MP, "mp", "b"],
40
+ [GG, "gg", "g"], [GK, "gk", "g"], [NT, "nt", "d"],
41
+ ["α", "a"], ["β", "b", "v"], ["γ", "g"], ["δ", "d"],
42
+ ["ε", "e"], ["ζ", "z"], ["η", "h", "i"], ["θ", "th"],
43
+ ["ι", "i"], ["κ", "k"], ["λ", "l"], ["μ", "m"],
44
+ ["ν", "n"], ["ξ", "ks", "x"], ["ο", "o"], ["π", "p"],
45
+ ["ρ", "r"], ["σ", "s"], ["τ", "t"], ["υ", "y", "u", "i"],
46
+ ["φ", "f", "ph"], ["χ", "x", "h", "ch"], ["ψ", "ps"],
47
+ ["ω", "w", "o", "v"]
48
+ ]
49
+
50
+ # The maximum greeklish expansions per greek token.
51
+ attr_reader :max_expansions
52
+
53
+ # A list of greeklish token per each greek word.
54
+ attr_reader :per_word_greeklish
55
+
56
+ # Keep the generated strings in a list. The populated
57
+ # list is returned to the filter.
58
+ attr_reader :greeklish_list
59
+
60
+ def initialize(max_expansions)
61
+ @max_expansions = max_expansions
62
+ @greeklish_list = []
63
+ @per_word_greeklish = []
64
+ @digraphs = {}
65
+ @conversions = Hash.new([])
66
+
67
+ # populate digraphs
68
+ DIGRAPH_CASES.each do |digraph_case|
69
+ key = digraph_case[0]
70
+ value = digraph_case[1]
71
+ @digraphs[key] = value
72
+ end
73
+
74
+ # populate conversions
75
+ CONVERT_STRINGS.each do |convert_string|
76
+ key = convert_string[0]
77
+ value = convert_string[1..convert_string.length]
78
+ @conversions[key] = value
79
+ end
80
+ end
81
+
82
+ # Gets a list of greek words and generates the greeklish version of
83
+ # each word.
84
+ #
85
+ # @param greek_words a list of greek words
86
+ # @return a list of greeklish words
87
+ def generate_greeklish_words(greek_words)
88
+ @greeklish_list.clear
89
+
90
+ greek_words.each do |greek_word|
91
+ @per_word_greeklish.clear
92
+
93
+ initial_token = greek_word
94
+
95
+ digraphs.each_key do |key|
96
+ greek_word = greek_word.gsub(key, digraphs[key])
97
+ end
98
+
99
+ # Convert it back to array of characters. The iterations of each
100
+ # character will take place through this array.
101
+ input_token = greek_word.split(//)
102
+
103
+ # Iterate through the characters of the token and generate
104
+ # greeklish words.
105
+ input_token.each do |greek_char|
106
+ add_character(conversions[greek_char])
107
+ end
108
+
109
+ @greeklish_list << per_word_greeklish.flatten
110
+ end
111
+
112
+ @greeklish_list.flatten
113
+ end
114
+
115
+ # Add the matching latin characters to the generated greeklish tokens
116
+ # for a specific Greek character. For each different combination of
117
+ # latin characters, a new token is generated.
118
+ #
119
+ # @param convert_strings the latin characters that will be added to the tokens
120
+ private
121
+
122
+ def add_character(convert_strings)
123
+ if (per_word_greeklish.empty?)
124
+ convert_strings.each do |convert_string|
125
+ if (per_word_greeklish.size >= max_expansions)
126
+ break
127
+ end
128
+ @per_word_greeklish << convert_string
129
+ end
130
+ else
131
+ new_tokens = []
132
+
133
+ convert_strings.each do |convert_string|
134
+ per_word_greeklish.each do |token|
135
+ if (new_tokens.size >= max_expansions)
136
+ break
137
+ end
138
+ new_tokens << "#{token}#{convert_string}"
139
+ end
140
+ end
141
+
142
+ @per_word_greeklish = new_tokens
143
+ end
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,3 @@
1
+ module Greeklish
2
+ VERSION = "0.0.1"
3
+ end
data/lib/greeklish.rb ADDED
@@ -0,0 +1,11 @@
1
+ require "greeklish/version"
2
+ require "greeklish/greeklish_generator"
3
+ require "greeklish/greek_reverse_stemmer"
4
+ require "greeklish/greeklish_converter"
5
+
6
+ module Greeklish
7
+ def self.converter(options={})
8
+ GreeklishConverter.new(options[:max_expansions],
9
+ options[:generate_greek_variants])
10
+ end
11
+ end
@@ -0,0 +1,98 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe 'GreeklishConverter' do
5
+ max_expansions = 10
6
+ generate_greek_variants = true
7
+
8
+ # a sample of greek words to generate their greeklish
9
+ # counterparts.
10
+ greek_words = ["αυτοκινητο", "ομπρελα", "ξεσκεπαστοσ"]
11
+
12
+ # the greeklish counterparts that should be generated from the greek words.
13
+ generated_greeklish_words = [
14
+ ["autokinhto", "aftokinhto", "avtokinhto", "aytokinhto",
15
+ "autokinito", "aftokinito", "avtokinito", "aytokinito",
16
+ "autokinhtwn", "aftokinhta", "avtokinhta", "aytokinhtwn"],
17
+ ["omprela", "obrela", "ompreles", "obrelwn", "obreles", "omprelas"],
18
+ ["kseskepastos", "xeskepastos", "kseskepastou", "xeskepastwn", "kseskepastoi"]
19
+ ]
20
+
21
+ # these words should not be processed by the converter.
22
+ invalid_words = ["mobile", "αυριο64", "καλάθι", "ΣΠιτι", "ομορφος" ]
23
+
24
+ before(:each) do
25
+ @greeklish_words = []
26
+ @converted_greeklish_strings = []
27
+ end
28
+
29
+ after(:each) do
30
+ @converted_greeklish_strings = []
31
+ @greeklish_words = []
32
+ end
33
+
34
+ it "does not convert invalid words" do
35
+ converter = Greeklish::GreeklishConverter.new(max_expansions, generate_greek_variants)
36
+
37
+ invalid_words.each do |invalid_word|
38
+ @greeklish_words = converter.convert(invalid_word)
39
+ expect(@greeklish_words.nil?).to eq(true)
40
+ end
41
+ end
42
+
43
+ it "converts valid words" do
44
+ converter = Greeklish::GreeklishConverter.new(max_expansions, generate_greek_variants)
45
+
46
+ greek_words.each_with_index do |word, i|
47
+ @greeklish_words = converter.convert(greek_words[i])
48
+ populate_converted_strings_list
49
+
50
+ expect(@greeklish_words.empty?).to eq(false)
51
+
52
+ generated_greeklish_words[i].each do |greeklish_word|
53
+ expect(@converted_greeklish_strings.include?(greeklish_word)).to eq(true)
54
+ end
55
+ end
56
+ end
57
+
58
+ it "respects max expansions" do
59
+ new_max_expansions = 2
60
+ generate_greek_variants = false
61
+ converter = Greeklish::GreeklishConverter.new(new_max_expansions, generate_greek_variants)
62
+
63
+ @greeklish_words = converter.convert(greek_words[0])
64
+
65
+ populate_converted_strings_list()
66
+
67
+ expect(@greeklish_words.size).to eq(new_max_expansions)
68
+
69
+ for i in 0..new_max_expansions-1 do
70
+ expect(@converted_greeklish_strings.include?(generated_greeklish_words[0][i])).to eq(true)
71
+ end
72
+
73
+ for j in new_max_expansions..generated_greeklish_words[0].length - 1 do
74
+ expect(@converted_greeklish_strings.include?(generated_greeklish_words[0][j])).to eq(false)
75
+ end
76
+ end
77
+
78
+ it "respects variant generation" do
79
+ new_max_expansions = 1
80
+ generate_greek_variants = false
81
+ converter = Greeklish::GreeklishConverter.new(new_max_expansions, generate_greek_variants)
82
+
83
+ @greeklish_words = converter.convert(greek_words[0])
84
+
85
+ populate_converted_strings_list()
86
+
87
+ expect(@converted_greeklish_strings.include?(generated_greeklish_words[0][0])).to eq(true)
88
+ expect(@converted_greeklish_strings.include?(generated_greeklish_words[0][9])).to eq(false)
89
+ end
90
+
91
+ private
92
+
93
+ def populate_converted_strings_list
94
+ @greeklish_words.each do |word|
95
+ @converted_greeklish_strings << word
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,64 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe 'GreeklishGenerator' do
5
+ max_expansions = 10
6
+
7
+ # a sample of greek words to generate their greeklish
8
+ # counterparts.
9
+ greek_words = ["αυτοκινητο", "ομπρελα", "ξεσκεπαστοσ"]
10
+
11
+ # the greeklish counterparts that should be generated
12
+ # from the greek words.
13
+ generated_greeklish_words = [
14
+ "autokinhto", "aftokinhto", "avtokinhto", "aytokinhto",
15
+ "autokinito", "aftokinito", "avtokinito", "aytokinito",
16
+ "omprela", "obrela", "kseskepastos", "xeskepastos"
17
+ ]
18
+
19
+ before(:each) do
20
+ @input_greek_list = []
21
+ @greeklish_words = []
22
+ @converted_greeklish_strings = []
23
+
24
+ @generator = Greeklish::GreeklishGenerator.new(max_expansions)
25
+ greek_words.each do |word|
26
+ @input_greek_list << word
27
+ end
28
+ end
29
+
30
+ after(:each) do
31
+ @converted_greeklish_strings = []
32
+ end
33
+
34
+ it "converts valid words" do
35
+ greek_words.each do |word|
36
+ @greeklish_words = @generator.generate_greeklish_words(@input_greek_list)
37
+
38
+ populate_converted_strings_list
39
+
40
+ expect(@greeklish_words.empty?).to eq(false)
41
+
42
+ generated_greeklish_words.each do |greeklish_word|
43
+ expect(@converted_greeklish_strings.include?(greeklish_word)).to eq(true)
44
+ end
45
+ end
46
+ end
47
+
48
+ it "respects the max expansion setting" do
49
+ @input_greek_list = []
50
+ new_max_expansions = 2
51
+ generator = Greeklish::GreeklishGenerator.new(new_max_expansions)
52
+
53
+ greeklish_words = generator.generate_greeklish_words(@input_greek_list)
54
+ expect(greeklish_words.size).to eq(new_max_expansions * @input_greek_list.size)
55
+ end
56
+
57
+ private
58
+
59
+ def populate_converted_strings_list
60
+ @greeklish_words.each do |word|
61
+ @converted_greeklish_strings << word
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,46 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe 'GreeklishReverseStemmer' do
5
+ # Some greek words whose variations we want to produce.
6
+ greek_words = ["κουρεματοσ", "ενδυματα", "γραφειου", "πεδιου",
7
+ "γραναζι", "ποδηλατα", "καλωδιων"]
8
+
9
+ # Words that should not match to any rule.
10
+ non_matching_words = ["σουτιεν", "κολλαν", "αμπαλαζ", "μακιγιαζ"]
11
+
12
+ # The output we expect for each of the above words.
13
+ greek_variants = [
14
+ ["κουρεμα", "κουρεματων", "κουρεματα"],
15
+ ["ενδυμα", "ενδυματων", "ενδυματα", "ενδυματοσ"],
16
+ ["γραφειο", "γραφεια", "γραφειων"],
17
+ ["πεδια", "πεδιο", "πεδιων"],
18
+ ["γραναζια", "γραναζιου", "γραναζιων"],
19
+ ["ποδηλατο", "ποδηλατου", "ποδηλατα", "ποδηλατων"],
20
+ ["καλωδιου", "καλωδια", "καλωδιο"]
21
+ ]
22
+
23
+ before(:all) do
24
+ @reverse_stemmer = Greeklish::GreekReverseStemmer.new
25
+ end
26
+
27
+ it "produces greek variants" do
28
+ greek_words.each_with_index do |word, index|
29
+ generated_greek_variants = @reverse_stemmer.generate_greek_variants(word)
30
+
31
+ expect(generated_greek_variants.size > 1).to eq(true)
32
+
33
+ greek_variants[index].each do |greek_variant|
34
+ expect(generated_greek_variants.include?(greek_variant)).to eq(true)
35
+ end
36
+ end
37
+ end
38
+
39
+ it "does not produce variants for non matching words" do
40
+ non_matching_words.each do |non_matching_word|
41
+ generated_greek_variants = @reverse_stemmer.generate_greek_variants(non_matching_word)
42
+
43
+ expect(generated_greek_variants.size).to eq(1)
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,14 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe 'Greeklish' do
5
+ it "correctly converts to greeklish" do
6
+ converter = Greeklish.converter(max_expansions: 2,
7
+ generate_greek_variants: false)
8
+
9
+ words = converter.convert("ομπρελα")
10
+
11
+ expect(words.length).to eq(2)
12
+ expect(words).to include("omprela", "obrela")
13
+ end
14
+ end
@@ -0,0 +1,91 @@
1
+ require 'greeklish'
2
+
3
+ # This file was generated by the `rspec --init` command. Conventionally, all
4
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
5
+ # The generated `.rspec` file contains `--require spec_helper` which will cause this
6
+ # file to always be loaded, without a need to explicitly require it in any files.
7
+ #
8
+ # Given that it is always loaded, you are encouraged to keep this file as
9
+ # light-weight as possible. Requiring heavyweight dependencies from this file
10
+ # will add to the boot time of your test suite on EVERY test run, even for an
11
+ # individual file that may not need all of that loaded. Instead, consider making
12
+ # a separate helper file that requires the additional dependencies and performs
13
+ # the additional setup, and require it from the spec files that actually need it.
14
+ #
15
+ # The `.rspec` file also contains a few flags that are not defaults but that
16
+ # users commonly want.
17
+ #
18
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
19
+ RSpec.configure do |config|
20
+ # rspec-expectations config goes here. You can use an alternate
21
+ # assertion/expectation library such as wrong or the stdlib/minitest
22
+ # assertions if you prefer.
23
+ config.expect_with :rspec do |expectations|
24
+ # This option will default to `true` in RSpec 4. It makes the `description`
25
+ # and `failure_message` of custom matchers include text for helper methods
26
+ # defined using `chain`, e.g.:
27
+ # be_bigger_than(2).and_smaller_than(4).description
28
+ # # => "be bigger than 2 and smaller than 4"
29
+ # ...rather than:
30
+ # # => "be bigger than 2"
31
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
32
+ end
33
+
34
+ # rspec-mocks config goes here. You can use an alternate test double
35
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
36
+ config.mock_with :rspec do |mocks|
37
+ # Prevents you from mocking or stubbing a method that does not exist on
38
+ # a real object. This is generally recommended, and will default to
39
+ # `true` in RSpec 4.
40
+ mocks.verify_partial_doubles = true
41
+ end
42
+
43
+ # The settings below are suggested to provide a good initial experience
44
+ # with RSpec, but feel free to customize to your heart's content.
45
+ =begin
46
+ # These two settings work together to allow you to limit a spec run
47
+ # to individual examples or groups you care about by tagging them with
48
+ # `:focus` metadata. When nothing is tagged with `:focus`, all examples
49
+ # get run.
50
+ config.filter_run :focus
51
+ config.run_all_when_everything_filtered = true
52
+
53
+ # Limits the available syntax to the non-monkey patched syntax that is recommended.
54
+ # For more details, see:
55
+ # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
56
+ # - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
57
+ # - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching
58
+ config.disable_monkey_patching!
59
+
60
+ # This setting enables warnings. It's recommended, but in some cases may
61
+ # be too noisy due to issues in dependencies.
62
+ config.warnings = true
63
+
64
+ # Many RSpec users commonly either run the entire suite or an individual
65
+ # file, and it's useful to allow more verbose output when running an
66
+ # individual spec file.
67
+ if config.files_to_run.one?
68
+ # Use the documentation formatter for detailed output,
69
+ # unless a formatter has already been configured
70
+ # (e.g. via a command-line flag).
71
+ config.default_formatter = 'doc'
72
+ end
73
+
74
+ # Print the 10 slowest examples and example groups at the
75
+ # end of the spec run, to help surface which specs are running
76
+ # particularly slow.
77
+ config.profile_examples = 10
78
+
79
+ # Run specs in random order to surface order dependencies. If you find an
80
+ # order dependency and want to debug it, you can fix the order by providing
81
+ # the seed, which is printed after each run.
82
+ # --seed 1234
83
+ config.order = :random
84
+
85
+ # Seed global randomization in this process using the `--seed` CLI option.
86
+ # Setting this allows you to use `--seed` to deterministically reproduce
87
+ # test failures related to randomization by passing the same `--seed` value
88
+ # as the one that triggered the failure.
89
+ Kernel.srand config.seed
90
+ =end
91
+ end
metadata ADDED
@@ -0,0 +1,109 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: greeklish
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Petros Markou
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-04-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 3.1.0
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 3.1.0
55
+ description: Configurable generator of Greek words to greeklish forms.
56
+ email:
57
+ - markoupetr@skroutz.gr
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".rspec"
64
+ - Gemfile
65
+ - LICENSE.txt
66
+ - README.md
67
+ - Rakefile
68
+ - greeklish.gemspec
69
+ - lib/greeklish.rb
70
+ - lib/greeklish/greek_reverse_stemmer.rb
71
+ - lib/greeklish/greeklish_converter.rb
72
+ - lib/greeklish/greeklish_generator.rb
73
+ - lib/greeklish/version.rb
74
+ - spec/greeklish_converter_spec.rb
75
+ - spec/greeklish_generator_spec.rb
76
+ - spec/greeklish_reverse_stemmer_spec.rb
77
+ - spec/greeklish_spec.rb
78
+ - spec/spec_helper.rb
79
+ homepage: https://github.com/skroutz/greeklish
80
+ licenses:
81
+ - MIT
82
+ metadata: {}
83
+ post_install_message:
84
+ rdoc_options: []
85
+ require_paths:
86
+ - lib
87
+ required_ruby_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ required_rubygems_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ requirements: []
98
+ rubyforge_project:
99
+ rubygems_version: 2.4.6
100
+ signing_key:
101
+ specification_version: 4
102
+ summary: Generates greeklish forms
103
+ test_files:
104
+ - spec/greeklish_converter_spec.rb
105
+ - spec/greeklish_generator_spec.rb
106
+ - spec/greeklish_reverse_stemmer_spec.rb
107
+ - spec/greeklish_spec.rb
108
+ - spec/spec_helper.rb
109
+ has_rdoc: