debilinguifier 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.rspec +2 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +100 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +35 -0
- data/Rakefile +52 -0
- data/VERSION +1 -0
- data/debilinguifier.gemspec +68 -0
- data/lib/debilinguifier.rb +92 -0
- data/spec/debilinguifier_spec.rb +18 -0
- data/spec/fixtures/examples.yml +29 -0
- data/spec/spec_helper.rb +59 -0
- metadata +147 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4b1d6cc0a60c5fce3deb06f4aa276333dafb0856
|
4
|
+
data.tar.gz: 38b2458b5f352ec40a857c963d37408319325339
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 42933e09d17df8926f2bd567a94758f735b08574922eaf5bd2228af9962fb40ddfaf86126283af23d79019c534568a86c0c6fbc2860749b53afb9ae873078254
|
7
|
+
data.tar.gz: 2d7512d15a5770cc59dab2e354347bf2388ecc07ae5956c4798705f29ca1c656e76f1f15be4290d6691866b71bf4c8ddc0eeea3419236533f32b2f9272f37f21
|
data/.document
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
source "https://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
|
6
|
+
# Add dependencies to develop your gem here.
|
7
|
+
# Include everything needed to run rake, tests, features, etc.
|
8
|
+
group :development do
|
9
|
+
gem "shoulda", ">= 0"
|
10
|
+
gem "rdoc", "~> 3.12"
|
11
|
+
gem "bundler", "~> 1.0"
|
12
|
+
gem "juwelier", "~> 2.1.0"
|
13
|
+
gem "simplecov", ">= 0"
|
14
|
+
gem "rspec", "~>3.5"
|
15
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
GEM
|
2
|
+
remote: https://rubygems.org/
|
3
|
+
specs:
|
4
|
+
activesupport (5.0.2)
|
5
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
6
|
+
i18n (~> 0.7)
|
7
|
+
minitest (~> 5.1)
|
8
|
+
tzinfo (~> 1.1)
|
9
|
+
addressable (2.4.0)
|
10
|
+
builder (3.2.3)
|
11
|
+
concurrent-ruby (1.0.5)
|
12
|
+
descendants_tracker (0.0.4)
|
13
|
+
thread_safe (~> 0.3, >= 0.3.1)
|
14
|
+
diff-lcs (1.3)
|
15
|
+
docile (1.1.5)
|
16
|
+
faraday (0.9.2)
|
17
|
+
multipart-post (>= 1.2, < 3)
|
18
|
+
git (1.3.0)
|
19
|
+
github_api (0.15.0)
|
20
|
+
addressable (~> 2.4.0)
|
21
|
+
descendants_tracker (~> 0.0.4)
|
22
|
+
faraday (~> 0.8, < 0.10)
|
23
|
+
hashie (>= 3.4)
|
24
|
+
mime-types (>= 1.16, < 3.0)
|
25
|
+
oauth2 (~> 1.0)
|
26
|
+
hashie (3.5.5)
|
27
|
+
highline (1.7.8)
|
28
|
+
i18n (0.8.1)
|
29
|
+
json (1.8.6)
|
30
|
+
juwelier (2.1.3)
|
31
|
+
builder
|
32
|
+
bundler (>= 1.13)
|
33
|
+
git (>= 1.2.5)
|
34
|
+
github_api
|
35
|
+
highline (>= 1.6.15)
|
36
|
+
nokogiri (>= 1.5.10)
|
37
|
+
rake
|
38
|
+
rdoc
|
39
|
+
semver
|
40
|
+
jwt (1.5.6)
|
41
|
+
mime-types (2.99.3)
|
42
|
+
mini_portile2 (2.1.0)
|
43
|
+
minitest (5.10.1)
|
44
|
+
multi_json (1.12.1)
|
45
|
+
multi_xml (0.6.0)
|
46
|
+
multipart-post (2.0.0)
|
47
|
+
nokogiri (1.7.1)
|
48
|
+
mini_portile2 (~> 2.1.0)
|
49
|
+
oauth2 (1.3.1)
|
50
|
+
faraday (>= 0.8, < 0.12)
|
51
|
+
jwt (~> 1.0)
|
52
|
+
multi_json (~> 1.3)
|
53
|
+
multi_xml (~> 0.5)
|
54
|
+
rack (>= 1.2, < 3)
|
55
|
+
rack (2.0.1)
|
56
|
+
rake (12.0.0)
|
57
|
+
rdoc (3.12.2)
|
58
|
+
json (~> 1.4)
|
59
|
+
rspec (3.5.0)
|
60
|
+
rspec-core (~> 3.5.0)
|
61
|
+
rspec-expectations (~> 3.5.0)
|
62
|
+
rspec-mocks (~> 3.5.0)
|
63
|
+
rspec-core (3.5.4)
|
64
|
+
rspec-support (~> 3.5.0)
|
65
|
+
rspec-expectations (3.5.0)
|
66
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
67
|
+
rspec-support (~> 3.5.0)
|
68
|
+
rspec-mocks (3.5.0)
|
69
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
70
|
+
rspec-support (~> 3.5.0)
|
71
|
+
rspec-support (3.5.0)
|
72
|
+
semver (1.0.1)
|
73
|
+
shoulda (3.5.0)
|
74
|
+
shoulda-context (~> 1.0, >= 1.0.1)
|
75
|
+
shoulda-matchers (>= 1.4.1, < 3.0)
|
76
|
+
shoulda-context (1.2.2)
|
77
|
+
shoulda-matchers (2.8.0)
|
78
|
+
activesupport (>= 3.0.0)
|
79
|
+
simplecov (0.14.1)
|
80
|
+
docile (~> 1.1.0)
|
81
|
+
json (>= 1.8, < 3)
|
82
|
+
simplecov-html (~> 0.10.0)
|
83
|
+
simplecov-html (0.10.0)
|
84
|
+
thread_safe (0.3.6)
|
85
|
+
tzinfo (1.2.2)
|
86
|
+
thread_safe (~> 0.1)
|
87
|
+
|
88
|
+
PLATFORMS
|
89
|
+
ruby
|
90
|
+
|
91
|
+
DEPENDENCIES
|
92
|
+
bundler (~> 1.0)
|
93
|
+
juwelier (~> 2.1.0)
|
94
|
+
rdoc (~> 3.12)
|
95
|
+
rspec (~> 3.5)
|
96
|
+
shoulda
|
97
|
+
simplecov
|
98
|
+
|
99
|
+
BUNDLED WITH
|
100
|
+
1.14.5
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2017 apapamichalis
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
= debilinguifier
|
2
|
+
|
3
|
+
This is a module to help me sanitize an already populated db. The db contains company names and product descriptions in capital letters. Users populating the db had been careless enough to allow both [greek, latin] characters to be used in a single word or phrase, making it difficult to alphabetically sort them or search for them.
|
4
|
+
|
5
|
+
The purpose of this gem is to help me import the data into a new db (for a completely different app) in a more deterministic way.
|
6
|
+
|
7
|
+
|
8
|
+
* Ruby version 2.4.0 is the minimum required due to [String supports Unicode case mappings] https://www.ruby-lang.org/en/news/2016/12/25/ruby-2-4-0-released/
|
9
|
+
|
10
|
+
* To use this gem, just add it (require 'debilinguifier') and call: DeBiLinguifier.dbl(your_string_to_de-bi-linguified)
|
11
|
+
|
12
|
+
* Please note that this only works with capital and detoned characters.
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
This is my very first gem and I used Juwelier https://github.com/flajann2/juwelier to create it.
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
== Contributing to debilinguifier
|
23
|
+
|
24
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
25
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
|
26
|
+
* Fork the project.
|
27
|
+
* Start a feature/bugfix branch.
|
28
|
+
* Commit and push until you are happy with your contribution.
|
29
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
30
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
31
|
+
|
32
|
+
== Copyright
|
33
|
+
|
34
|
+
Copyright (c) 2017 apapamichalis. See LICENSE.txt for
|
35
|
+
further details.
|
data/Rakefile
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'juwelier'
|
15
|
+
Juwelier::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://guides.rubygems.org/specification-reference/ for more options
|
17
|
+
gem.name = "debilinguifier"
|
18
|
+
gem.homepage = "http://github.com/apapamichalis/debilinguifier"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{A [greek, latin] debilinguifier}
|
21
|
+
gem.description = %Q{The purpose of this gem is to return a phrase written using two charsets due to user's mistake. The reason behind this is that we have a db we want to migrate populated with such entries and we want to somehow sanitize it. The db contains company and product names in capital letters (e.g. the user might have written "komπολοι".upcase instead of "κομπολοι".upcase", resulting in a string that in capital letters seems to be the same, but in practice is not)}
|
22
|
+
gem.email = "dimxer@hotmail.com"
|
23
|
+
gem.authors = ["apapamichalis"]
|
24
|
+
|
25
|
+
# dependencies defined in Gemfile
|
26
|
+
end
|
27
|
+
Juwelier::RubygemsDotOrgTasks.new
|
28
|
+
|
29
|
+
require 'rake/testtask'
|
30
|
+
Rake::TestTask.new(:test) do |test|
|
31
|
+
test.libs << 'lib' << 'test'
|
32
|
+
test.pattern = 'test/**/test_*.rb'
|
33
|
+
test.verbose = true
|
34
|
+
end
|
35
|
+
|
36
|
+
desc "Code coverage detail"
|
37
|
+
task :simplecov do
|
38
|
+
ENV['COVERAGE'] = "true"
|
39
|
+
Rake::Task['test'].execute
|
40
|
+
end
|
41
|
+
|
42
|
+
task :default => :test
|
43
|
+
|
44
|
+
require 'rdoc/task'
|
45
|
+
Rake::RDocTask.new do |rdoc|
|
46
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
47
|
+
|
48
|
+
rdoc.rdoc_dir = 'rdoc'
|
49
|
+
rdoc.title = "debilinguifier #{version}"
|
50
|
+
rdoc.rdoc_files.include('README*')
|
51
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
52
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# Generated by juwelier
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Juwelier::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
# stub: debilinguifier 0.1.0 ruby lib
|
6
|
+
|
7
|
+
Gem::Specification.new do |s|
|
8
|
+
s.name = "debilinguifier".freeze
|
9
|
+
s.version = "0.1.0"
|
10
|
+
|
11
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
12
|
+
s.require_paths = ["lib".freeze]
|
13
|
+
s.authors = ["apapamichalis".freeze]
|
14
|
+
s.date = "2017-03-26"
|
15
|
+
s.description = "The purpose of this gem is to return a phrase written using two charsets due to user's mistake. The reason behind this is that we have a db we want to migrate populated with such entries and we want to somehow sanitize it. The db contains company and product names in capital letters (e.g. the user might have written \"kom\u03C0\u03BF\u03BB\u03BF\u03B9\".upcase instead of \"\u03BA\u03BF\u03BC\u03C0\u03BF\u03BB\u03BF\u03B9\".upcase\", resulting in a string that in capital letters seems to be the same, but in practice is not)".freeze
|
16
|
+
s.email = "dimxer@hotmail.com".freeze
|
17
|
+
s.extra_rdoc_files = [
|
18
|
+
"LICENSE.txt",
|
19
|
+
"README.rdoc"
|
20
|
+
]
|
21
|
+
s.files = [
|
22
|
+
".document",
|
23
|
+
".rspec",
|
24
|
+
"Gemfile",
|
25
|
+
"Gemfile.lock",
|
26
|
+
"LICENSE.txt",
|
27
|
+
"README.rdoc",
|
28
|
+
"Rakefile",
|
29
|
+
"VERSION",
|
30
|
+
"debilinguifier.gemspec",
|
31
|
+
"lib/debilinguifier.rb",
|
32
|
+
"spec/debilinguifier_spec.rb",
|
33
|
+
"spec/fixtures/examples.yml",
|
34
|
+
"spec/spec_helper.rb"
|
35
|
+
]
|
36
|
+
s.homepage = "http://github.com/apapamichalis/debilinguifier".freeze
|
37
|
+
s.licenses = ["MIT".freeze]
|
38
|
+
s.rubygems_version = "2.6.8".freeze
|
39
|
+
s.summary = "A [greek, latin] debilinguifier".freeze
|
40
|
+
|
41
|
+
if s.respond_to? :specification_version then
|
42
|
+
s.specification_version = 4
|
43
|
+
|
44
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
45
|
+
s.add_development_dependency(%q<shoulda>.freeze, [">= 0"])
|
46
|
+
s.add_development_dependency(%q<rdoc>.freeze, ["~> 3.12"])
|
47
|
+
s.add_development_dependency(%q<bundler>.freeze, ["~> 1.0"])
|
48
|
+
s.add_development_dependency(%q<juwelier>.freeze, ["~> 2.1.0"])
|
49
|
+
s.add_development_dependency(%q<simplecov>.freeze, [">= 0"])
|
50
|
+
s.add_development_dependency(%q<rspec>.freeze, ["~> 3.5"])
|
51
|
+
else
|
52
|
+
s.add_dependency(%q<shoulda>.freeze, [">= 0"])
|
53
|
+
s.add_dependency(%q<rdoc>.freeze, ["~> 3.12"])
|
54
|
+
s.add_dependency(%q<bundler>.freeze, ["~> 1.0"])
|
55
|
+
s.add_dependency(%q<juwelier>.freeze, ["~> 2.1.0"])
|
56
|
+
s.add_dependency(%q<simplecov>.freeze, [">= 0"])
|
57
|
+
s.add_dependency(%q<rspec>.freeze, ["~> 3.5"])
|
58
|
+
end
|
59
|
+
else
|
60
|
+
s.add_dependency(%q<shoulda>.freeze, [">= 0"])
|
61
|
+
s.add_dependency(%q<rdoc>.freeze, ["~> 3.12"])
|
62
|
+
s.add_dependency(%q<bundler>.freeze, ["~> 1.0"])
|
63
|
+
s.add_dependency(%q<juwelier>.freeze, ["~> 2.1.0"])
|
64
|
+
s.add_dependency(%q<simplecov>.freeze, [">= 0"])
|
65
|
+
s.add_dependency(%q<rspec>.freeze, ["~> 3.5"])
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# This only works with capital AND detoned characters of latin and greek charsets
|
2
|
+
|
3
|
+
module DeBiLinguifier
|
4
|
+
extend self
|
5
|
+
|
6
|
+
# The symbols
|
7
|
+
SYMBOLS = '\s\.\,\@\d\-\(\)\:\/\&\''.freeze
|
8
|
+
# A regular expression to check if the input phrase's characters all belong in the greek charset
|
9
|
+
GREEK_LOOKING_CHARS = Regexp.new("(^[Α-ΩABEHIKMNOPTXYZ#{SYMBOLS}]+)+$").freeze
|
10
|
+
# A regular expression to check if the input phrase's characters all belong in the latin charset
|
11
|
+
LATIN_LOOKING_CHARS = Regexp.new("(^[A-ZΑΒΕΗΙΚΜΝΟΡΤΥΧΖ#{SYMBOLS}]+)+$").freeze
|
12
|
+
# A regular expression to match strings already written only with latin charset
|
13
|
+
LATIN_ALPHABET_PLUS_SYMBOLS = Regexp.new("(^[A-Z#{SYMBOLS}]+)+$").freeze
|
14
|
+
# A regular expression to match strings already written only with latin charset
|
15
|
+
GREEK_ALPHABET_PLUS_SYMBOLS = Regexp.new("(^[Α-Ω#{SYMBOLS}]+)+$").freeze
|
16
|
+
|
17
|
+
|
18
|
+
# Only works with latin and greek charsets.
|
19
|
+
# An input phrase can only be one of five things:
|
20
|
+
# 1) Already only in greek or only in latin charset.
|
21
|
+
# 2) Written in a mixed charset, but can be written with just the greek charset.
|
22
|
+
# 3) Written in a mixed charset, but can be written with just the latin charset.
|
23
|
+
# 4) Written in a mixed charset, but cannot be written with only one of the [greek, latin] charsets.
|
24
|
+
# In this case we split the phrase into words and apply the above rules to each word seperately.
|
25
|
+
# If case 4 applies to a single word, there is nothing more we can do for it than return it "as is".
|
26
|
+
# 5) Written in a mixed charset, but can be written either with just the greek charset or just the latin charset.
|
27
|
+
#
|
28
|
+
# Note: We are deliberately ignoring case 5, as it is of no use at the moment as a separate case.
|
29
|
+
# It is actually the initersection of cases 2 and 3. Using case 2 instead.
|
30
|
+
# @params input [String] the string we want to de-bi-linguify (!)
|
31
|
+
# @return [String] the de-bi-linguized string
|
32
|
+
def dbl(input)
|
33
|
+
if(is_greek_only?(input) || is_latin_only?(input)) # Case 1
|
34
|
+
input
|
35
|
+
elsif(can_write_only_greek?(input)) # Case 2
|
36
|
+
return_in_greek(input)
|
37
|
+
elsif(can_write_only_latin?(input)) # Case 3
|
38
|
+
return_in_latin(input)
|
39
|
+
else # Case 4
|
40
|
+
return_in_mixed_charset(input)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Determine if the input phrase is already only in greek charset
|
45
|
+
def is_greek_only?(input)
|
46
|
+
!!(input.match(GREEK_ALPHABET_PLUS_SYMBOLS))
|
47
|
+
end
|
48
|
+
|
49
|
+
# Determine if the input phrase is already only in latin charset
|
50
|
+
def is_latin_only?(input)
|
51
|
+
!!(input.match(LATIN_ALPHABET_PLUS_SYMBOLS))
|
52
|
+
end
|
53
|
+
|
54
|
+
# Determine if the whole phrase can be written only with greek charset
|
55
|
+
def can_write_only_greek?(input)
|
56
|
+
!!(input.match(GREEK_LOOKING_CHARS))
|
57
|
+
end
|
58
|
+
|
59
|
+
# Determine if the whole phrase can be written only with latin charset
|
60
|
+
def can_write_only_latin?(input)
|
61
|
+
!!(input.match(LATIN_LOOKING_CHARS))
|
62
|
+
end
|
63
|
+
|
64
|
+
# Return the phrase using the greek characters only
|
65
|
+
def return_in_greek(input)
|
66
|
+
input.tr('abehikmnoptxyz'.upcase, 'αβεηικμνορτχυζ'.upcase)
|
67
|
+
end
|
68
|
+
|
69
|
+
# Return the phrase using the latin characters only
|
70
|
+
def return_in_latin(input)
|
71
|
+
input.tr('αβεηικμνορτχυζ'.upcase, 'abehikmnoptxyz'.upcase)
|
72
|
+
end
|
73
|
+
|
74
|
+
# Return the phrase using both charsets
|
75
|
+
def return_in_mixed_charset(input)
|
76
|
+
# Split the phrase in words and recursively try to return each word in the "correct" charset
|
77
|
+
# If that is not possible (e.g. a word contains both "Φ" and "C", return it as it was originally
|
78
|
+
# We first split the input phrase, based on the SYMBOLS delimiters
|
79
|
+
words_arr = input.split(/(?<=[#{SYMBOLS}])/)
|
80
|
+
if words_arr.length == 1 # If it was only one word, return it.
|
81
|
+
return (words_arr.join.to_s)
|
82
|
+
else # Else apply dbl to each word we got after splitting input
|
83
|
+
words_arr2 =[]
|
84
|
+
words_arr.each do |word|
|
85
|
+
words_arr2 << dbl(word)
|
86
|
+
end
|
87
|
+
return words_arr2.join.to_s
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'debilinguifier'
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
describe DeBiLinguifier do
|
6
|
+
|
7
|
+
context 'When trying to de-bi-linguify a word' do
|
8
|
+
let(:examples) { YAML.load_file('spec/fixtures/examples.yml') }
|
9
|
+
|
10
|
+
it 'should return the expected result' do
|
11
|
+
examples.each_pair do |input, output|
|
12
|
+
expect(DeBiLinguifier.dbl(input.upcase)).to eq(output.upcase)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# test case 1a
|
2
|
+
αγρος: αγρος
|
3
|
+
ο αγρος: ο αγρος
|
4
|
+
ο.αγρος: ο.αγρος
|
5
|
+
|
6
|
+
|
7
|
+
# test case 1b
|
8
|
+
force: force
|
9
|
+
the force: the force
|
10
|
+
the& force: the& force
|
11
|
+
|
12
|
+
|
13
|
+
# test case 2
|
14
|
+
enepγεια: ενεργεια
|
15
|
+
h enepγεια: η ενεργεια
|
16
|
+
η ενεργεia: η ενεργεια
|
17
|
+
η&ενεργεia.: η&ενεργεια.
|
18
|
+
|
19
|
+
|
20
|
+
# test case 3
|
21
|
+
ροwer: power
|
22
|
+
τηε ροwer: the power
|
23
|
+
the ροwer: the power
|
24
|
+
"&τηε.ροwer": "&the.power"
|
25
|
+
|
26
|
+
# test case 4
|
27
|
+
cψ: cψ
|
28
|
+
ψαλιδi s.α.: ψαλιδι s.α.
|
29
|
+
yεt another strange τεst.- sα' & aφοι: υετ another strange test.- sa' & αφοι
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
2
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
3
|
+
# The generated `.rspec` file contains `--require spec_helper` which will cause
|
4
|
+
# this file to always be loaded, without a need to explicitly require it in any
|
5
|
+
# files.
|
6
|
+
#
|
7
|
+
# Given that it is always loaded, you are encouraged to keep this file as
|
8
|
+
# light-weight as possible. Requiring heavyweight dependencies from this file
|
9
|
+
# will add to the boot time of your test suite on EVERY test run, even for an
|
10
|
+
# individual file that may not need all of that loaded. Instead, consider making
|
11
|
+
# a separate helper file that requires the additional dependencies and performs
|
12
|
+
# the additional setup, and require it from the spec files that actually need
|
13
|
+
# it.
|
14
|
+
#
|
15
|
+
# The `.rspec` file also contains a few flags that are not defaults but that
|
16
|
+
# users commonly want.
|
17
|
+
#
|
18
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
19
|
+
RSpec.configure do |config|
|
20
|
+
# rspec-expectations config goes here. You can use an alternate
|
21
|
+
# assertion/expectation library such as wrong or the stdlib/minitest
|
22
|
+
# assertions if you prefer.
|
23
|
+
config.expect_with :rspec do |expectations|
|
24
|
+
# This option will default to `true` in RSpec 4. It makes the `description`
|
25
|
+
# and `failure_message` of custom matchers include text for helper methods
|
26
|
+
# defined using `chain`, e.g.:
|
27
|
+
# be_bigger_than(2).and_smaller_than(4).description
|
28
|
+
# # => "be bigger than 2 and smaller than 4"
|
29
|
+
# ...rather than:
|
30
|
+
# # => "be bigger than 2"
|
31
|
+
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
32
|
+
end
|
33
|
+
|
34
|
+
# rspec-mocks config goes here. You can use an alternate test double
|
35
|
+
# library (such as bogus or mocha) by changing the `mock_with` option here.
|
36
|
+
config.mock_with :rspec do |mocks|
|
37
|
+
# Prevents you from mocking or stubbing a method that does not exist on
|
38
|
+
# a real object. This is generally recommended, and will default to
|
39
|
+
# `true` in RSpec 4.
|
40
|
+
mocks.verify_partial_doubles = true
|
41
|
+
end
|
42
|
+
|
43
|
+
# This option will default to `:apply_to_host_groups` in RSpec 4 (and will
|
44
|
+
# have no way to turn it off -- the option exists only for backwards
|
45
|
+
# compatibility in RSpec 3). It causes shared context metadata to be
|
46
|
+
# inherited by the metadata hash of host groups and examples, rather than
|
47
|
+
# triggering implicit auto-inclusion in groups with matching metadata.
|
48
|
+
config.shared_context_metadata_behavior = :apply_to_host_groups
|
49
|
+
|
50
|
+
|
51
|
+
|
52
|
+
# Run specs in random order to surface order dependencies. If you find an
|
53
|
+
# order dependency and want to debug it, you can fix the order by providing
|
54
|
+
# the seed, which is printed after each run.
|
55
|
+
# --seed 1234
|
56
|
+
config.order = :random
|
57
|
+
|
58
|
+
|
59
|
+
end
|
metadata
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: debilinguifier
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- apapamichalis
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-03-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: shoulda
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rdoc
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.12'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3.12'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: juwelier
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 2.1.0
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 2.1.0
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: simplecov
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rspec
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '3.5'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '3.5'
|
97
|
+
description: The purpose of this gem is to return a phrase written using two charsets
|
98
|
+
due to user's mistake. The reason behind this is that we have a db we want to migrate
|
99
|
+
populated with such entries and we want to somehow sanitize it. The db contains
|
100
|
+
company and product names in capital letters (e.g. the user might have written "komπολοι".upcase
|
101
|
+
instead of "κομπολοι".upcase", resulting in a string that in capital letters seems
|
102
|
+
to be the same, but in practice is not)
|
103
|
+
email: dimxer@hotmail.com
|
104
|
+
executables: []
|
105
|
+
extensions: []
|
106
|
+
extra_rdoc_files:
|
107
|
+
- LICENSE.txt
|
108
|
+
- README.rdoc
|
109
|
+
files:
|
110
|
+
- ".document"
|
111
|
+
- ".rspec"
|
112
|
+
- Gemfile
|
113
|
+
- Gemfile.lock
|
114
|
+
- LICENSE.txt
|
115
|
+
- README.rdoc
|
116
|
+
- Rakefile
|
117
|
+
- VERSION
|
118
|
+
- debilinguifier.gemspec
|
119
|
+
- lib/debilinguifier.rb
|
120
|
+
- spec/debilinguifier_spec.rb
|
121
|
+
- spec/fixtures/examples.yml
|
122
|
+
- spec/spec_helper.rb
|
123
|
+
homepage: http://github.com/apapamichalis/debilinguifier
|
124
|
+
licenses:
|
125
|
+
- MIT
|
126
|
+
metadata: {}
|
127
|
+
post_install_message:
|
128
|
+
rdoc_options: []
|
129
|
+
require_paths:
|
130
|
+
- lib
|
131
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
132
|
+
requirements:
|
133
|
+
- - ">="
|
134
|
+
- !ruby/object:Gem::Version
|
135
|
+
version: '0'
|
136
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
137
|
+
requirements:
|
138
|
+
- - ">="
|
139
|
+
- !ruby/object:Gem::Version
|
140
|
+
version: '0'
|
141
|
+
requirements: []
|
142
|
+
rubyforge_project:
|
143
|
+
rubygems_version: 2.6.8
|
144
|
+
signing_key:
|
145
|
+
specification_version: 4
|
146
|
+
summary: A [greek, latin] debilinguifier
|
147
|
+
test_files: []
|