alphabets 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2679f375c1118915625e06811cf773a4d59707a6
4
+ data.tar.gz: 5375336c3b4d0002923547f5a9d41498d629722f
5
+ SHA512:
6
+ metadata.gz: 3ff7470fe10524e8b0080cc36c9eb7a2534f70f75a4f47ca1cf46698bb2cf68a8845b45a364c01a65469bfa3d42f7865172aff8217103766043675b30924bf7a
7
+ data.tar.gz: '018796bf5c80bc970458716cf297f53616fb2ee1a031e91fa184d5351483564760a688e283368d760838832350b683d6951c4ce857eaa168177269ae451c10dd'
data/HISTORY.md ADDED
@@ -0,0 +1,3 @@
1
+ ### 0.0.1 / 2019-08-13
2
+
3
+ * Everything is new. First release.
data/Manifest.txt ADDED
@@ -0,0 +1,13 @@
1
+ HISTORY.md
2
+ Manifest.txt
3
+ NOTES.md
4
+ README.md
5
+ Rakefile
6
+ lib/alphabets.rb
7
+ lib/alphabets/alphabets.rb
8
+ lib/alphabets/variants.rb
9
+ lib/alphabets/version.rb
10
+ test/helper.rb
11
+ test/test_downcase.rb
12
+ test/test_unaccent.rb
13
+ test/test_variants.rb
data/NOTES.md ADDED
@@ -0,0 +1,3 @@
1
+ # Notes
2
+
3
+ ## Todos
data/README.md ADDED
@@ -0,0 +1,26 @@
1
+ # alphabets -
2
+
3
+
4
+ * home :: [github.com/sportdb/sport.db](https://github.com/sportdb/sport.db)
5
+ * bugs :: [github.com/sportdb/sport.db/issues](https://github.com/sportdb/sport.db/issues)
6
+ * gem :: [rubygems.org/gems/alphabets](https://rubygems.org/gems/alphabets)
7
+ * rdoc :: [rubydoc.info/gems/alphabets](http://rubydoc.info/gems/alphabets)
8
+ * forum :: [opensport](http://groups.google.com/group/opensport)
9
+
10
+
11
+ ## Usage
12
+
13
+ To be done
14
+
15
+
16
+ ## License
17
+
18
+ The `alphabets` scripts are dedicated to the public domain.
19
+ Use it as you please with no restrictions whatsoever.
20
+
21
+
22
+ ## Questions? Comments?
23
+
24
+ Send them along to the
25
+ [Open Sports & Friends Forum/Mailing List](http://groups.google.com/group/opensport).
26
+ Thanks!
data/Rakefile ADDED
@@ -0,0 +1,28 @@
1
+ require 'hoe'
2
+ require './lib/alphabets/version.rb'
3
+
4
+ Hoe.spec 'alphabets' do
5
+
6
+ self.version = Alphabet::VERSION
7
+
8
+ self.summary = "alphabets - "
9
+ self.description = summary
10
+
11
+ self.urls = ['https://github.com/sportdb/sport.db']
12
+
13
+ self.author = 'Gerald Bauer'
14
+ self.email = 'opensport@googlegroups.com'
15
+
16
+ # switch extension to .markdown for gihub formatting
17
+ self.readme_file = 'README.md'
18
+ self.history_file = 'HISTORY.md'
19
+
20
+ self.licenses = ['Public Domain']
21
+
22
+ self.extra_deps = []
23
+
24
+ self.spec_extras = {
25
+ :required_ruby_version => '>= 2.2.2'
26
+ }
27
+
28
+ end
data/lib/alphabets.rb ADDED
@@ -0,0 +1,34 @@
1
+ # encoding: utf-8
2
+
3
+ require 'pp'
4
+
5
+
6
+ ###
7
+ # our own code
8
+ require 'alphabets/version' # let version always go first
9
+ require 'alphabets/alphabets'
10
+ require 'alphabets/variants'
11
+
12
+
13
+
14
+ ## add "global" convenience helper
15
+ def downcase_i18n( name )
16
+ Alphabet.downcase_i18n( name )
17
+ end
18
+
19
+ def unaccent( name )
20
+ Alphabet.unaccent( name ) ## using "default" language character mapping / table
21
+ end
22
+
23
+ def variants( name ) ## todo/check: rename to unaccent_variants or unaccent_names - why? why not?
24
+ Variant.find( name )
25
+ end
26
+
27
+
28
+ ## add convenience aliases - also add Alpha - why? why not?
29
+ Abc = Alphabet
30
+ Alphabets = Alphabet
31
+ Alpha = Alphabet
32
+
33
+
34
+ puts Alphabet.banner # say hello
@@ -0,0 +1,186 @@
1
+ # encoding: utf-8
2
+
3
+ class Alphabet ## todo/fix: add alias Abc and Alpha too? why? why not?
4
+ def self.frequency_table( name ) ## todo/check: use/rename to char_frequency_table
5
+ ## calculate the frequency table of letters, digits, etc.
6
+ freq = Hash.new(0)
7
+ name.each_char do |ch|
8
+ freq[ch] += 1
9
+ end
10
+ freq
11
+ end
12
+
13
+
14
+ def self.count( freq, mapping_or_chars )
15
+ chars = if mapping_or_chars.is_a?( Hash )
16
+ mapping_or_chars.keys
17
+ else ## todo/fix: check for is_a? Array and if is String split into Array (on char at a time?) - why? why not?
18
+ mapping_or_chars ## assume it's an array/list of characters
19
+ end
20
+
21
+ chars.reduce(0) do |count,ch|
22
+ count += freq[ch]
23
+ count
24
+ end
25
+ end
26
+
27
+
28
+ def self.tr( name, mapping )
29
+ buf = String.new
30
+ name.each_char do |ch|
31
+ buf << if mapping[ch]
32
+ mapping[ch]
33
+ else
34
+ ch
35
+ end
36
+ end
37
+ buf
38
+ end
39
+
40
+
41
+ class Unaccenter #Worker ## todo/change - find a better name - why? why not?
42
+ def initialize( mapping )
43
+ @mapping = mapping
44
+ end
45
+
46
+ def count( name ) Alphabet.count( name, @mapping ); end
47
+ def unaccent( name ) Alphabet.tr( name, @mapping ); end
48
+ end # class Unaccent Worker
49
+
50
+
51
+ def self.find_unaccenter( key )
52
+ if key == :de
53
+ @de ||= Unaccenter.new( UNACCENT_DE )
54
+ @de
55
+ else
56
+ ## use uni(versal) or unicode or something - why? why not?
57
+ ## use all or int'l (international) - why? why not?
58
+ ## use en (english) - why? why not?
59
+ @default ||= Unaccenter.new( UNACCENT )
60
+ @default
61
+ end
62
+ end
63
+
64
+ def self.unaccent( name )
65
+ @default ||= Unaccenter.new( UNACCENT )
66
+ @default.unaccent( name )
67
+ end
68
+
69
+
70
+ def self.downcase_i18n( name ) ## our very own downcase for int'l characters / letters
71
+ tr( name, DOWNCASE )
72
+ end
73
+ ## add downcase_uni - univeral/unicode - why? why not?
74
+
75
+
76
+ ## "simple" unaccent (remove accents/diacritics and unfold ligatures) translation table / mapping
77
+ UNACCENT = {
78
+ 'Ä'=>'A', 'ä'=>'a',
79
+ 'Á'=>'A', 'á'=>'a',
80
+ 'à'=>'a',
81
+ 'ã'=>'a',
82
+ 'â'=>'a',
83
+ 'Å'=>'A', 'å'=>'a',
84
+ 'æ'=>'ae',
85
+ 'ā'=>'a',
86
+ 'ă'=>'a',
87
+ 'ą'=>'a',
88
+
89
+ 'Ç' =>'C', 'ç'=>'c',
90
+ 'ć'=>'c',
91
+ 'Č'=>'C', 'č'=>'c',
92
+
93
+ 'É'=>'E', 'é'=>'e',
94
+ 'è'=>'e',
95
+ 'ê'=>'e',
96
+ 'ë'=>'e',
97
+ 'ė'=>'e',
98
+ 'ę'=>'e',
99
+
100
+ 'ğ'=>'g',
101
+
102
+ 'İ'=>'I',
103
+ 'Í'=>'I', 'í'=>'i',
104
+ 'î'=>'i',
105
+ 'ī'=>'i',
106
+ 'ı'=>'i',
107
+
108
+ 'Ł'=>'L', 'ł'=>'l',
109
+
110
+ 'ñ'=>'n',
111
+ 'ń'=>'n',
112
+ 'ň'=>'n',
113
+
114
+ 'Ö'=>'O', 'ö'=>'o',
115
+ 'ó'=>'o',
116
+ 'õ'=>'o',
117
+ 'ô'=>'o',
118
+ 'ø'=>'o',
119
+ 'ő'=>'o',
120
+
121
+ 'ř'=>'r',
122
+
123
+ 'Ś'=>'S',
124
+ 'Ş'=>'S', 'ş'=>'s',
125
+ 'Š'=>'S', 'š'=>'s',
126
+ 'ș'=>'s', ## U+0219
127
+ 'ß'=>'ss',
128
+
129
+ 'ţ'=>'t', ## U+0163
130
+ 'ț'=>'t', ## U+021B
131
+ 'þ'=>'th', #### fix!!!! use p - why? why not?
132
+
133
+ 'Ü'=>'U', 'ü'=>'u',
134
+ 'Ú'=>'U', 'ú'=>'u',
135
+ 'ū'=>'u',
136
+
137
+ 'ý'=>'y',
138
+
139
+ 'ź'=>'z',
140
+ 'ż'=>'z',
141
+ 'Ž'=>'Z', 'ž'=>'z',
142
+ }
143
+
144
+
145
+ ## de,at,ch translation for umlauts
146
+ UNACCENT_DE = {
147
+ 'Ä'=>'Ae', 'ä'=>'ae', ### Use AE, OE, UE and NOT Ae, Oe, Ue - why? why not? e.g.VÖST => VOEST or Ö => OE
148
+ 'Ö'=>'Oe', 'ö'=>'oe',
149
+ 'Ü'=>'Ue', 'ü'=>'ue',
150
+ 'ß'=>'ss',
151
+ }
152
+
153
+ ## add UNACCENT_ES - why? why not? is Espanyol catalan spelling or spanish (castillian)?
154
+ # 'ñ'=>'ny', ## e.g. Español => Espanyol
155
+
156
+ DOWNCASE = %w[A B C D E F G H I J K L M N O P Q R S T U V W X Y Z].reduce({}) do |h,ch|
157
+ h[ch] = ch.downcase
158
+ h
159
+ end.merge(
160
+ 'Ä'=>'ä',
161
+ 'Á'=>'á',
162
+ 'Å'=>'å',
163
+
164
+ 'Ç'=>'ç',
165
+ 'Č'=>'č',
166
+
167
+ 'É'=>'é',
168
+
169
+ 'İ'=>'?', ## fix - add lowercase
170
+ 'Í'=>'í',
171
+
172
+ 'Ł'=>'ł',
173
+
174
+ 'Ö'=>'ö',
175
+
176
+ 'Ś'=>'?', ## fix - add lowercase
177
+ 'Ş'=>'ş',
178
+ 'Š'=>'š',
179
+
180
+ 'Ü'=>'ü',
181
+ 'Ú'=>'ú',
182
+
183
+ 'Ž'=>'ž',
184
+ )
185
+
186
+ end # class Alphabet
@@ -0,0 +1,72 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ class Variant ## (spelling) variant finder / builder for names
5
+
6
+ EN_UNACCENTER = Alphabet.find_unaccenter( :en ) ## assume english (en) as default for know - change to universal/int'l/default or something - why? why not?
7
+ DE_UNACCENTER = Alphabet.find_unaccenter( :de )
8
+
9
+ def self.find( name )
10
+ alt_names = []
11
+
12
+ freq = Alphabet.frequency_table( name )
13
+
14
+ en = EN_UNACCENTER
15
+ if en.count( freq ) > 0 # check if includes äöü (that is, character with accents or diacritics) etc.
16
+ alt_names << en.unaccent( name )
17
+ end
18
+
19
+ de = DE_UNACCENTER
20
+ if de.count( freq ) > 0
21
+ alt_names << de.unaccent( name )
22
+ end
23
+
24
+ ## todo - make uniq e.g. Preußen is Preussen, Preussen 2x
25
+ alt_names = alt_names.uniq
26
+ alt_names
27
+ end
28
+
29
+ end # class Variant
30
+
31
+
32
+
33
+ ######################################
34
+ # expiremental class - use (just) Name or NameQ or NameVariant or NameAnalyzer/Query or similar - why? why not?
35
+ ## let's wait for now with usage - let's add more methods as we go along and find more - why? why not?
36
+ class NameQuery
37
+ def initialize( name )
38
+ @name = name
39
+ end
40
+
41
+ def frequency_table
42
+ @freq ||= Alphabet.frequency_table( @name )
43
+ end
44
+
45
+ def variants
46
+ @variants ||= find_variants
47
+ end
48
+
49
+ private
50
+ EN_UNACCENTER = Alphabet.find_unaccenter( :en ) ## assume english (en) as default for know - change to universal/int'l/default or something - why? why not?
51
+ DE_UNACCENTER = Alphabet.find_unaccenter( :de )
52
+
53
+ def find_variants
54
+ alt_names = []
55
+
56
+ freq = frequency_table
57
+
58
+ en = EN_UNACCENTER
59
+ if en.count( freq ) > 0 # check if includes äöü (that is, character with accents or diacritics) etc.
60
+ alt_names << en.unaccent( @name )
61
+ end
62
+
63
+ de = DE_UNACCENTER
64
+ if de.count( freq ) > 0
65
+ alt_names << de.unaccent( @name )
66
+ end
67
+
68
+ ## todo - make uniq e.g. Preußen is Preussen, Preussen 2x
69
+ alt_names = alt_names.uniq
70
+ alt_names
71
+ end
72
+ end ## class VariantName
@@ -0,0 +1,23 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ ## todo/check: use a module Alphabets with s to keep version and banner separate - why? why not?
5
+
6
+ class Alphabet
7
+ MAJOR = 0 ## todo: namespace inside version or something - why? why not??
8
+ MINOR = 0
9
+ PATCH = 1
10
+ VERSION = [MAJOR,MINOR,PATCH].join('.')
11
+
12
+ def self.version
13
+ VERSION
14
+ end
15
+
16
+ def self.banner
17
+ "alphabets/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
18
+ end
19
+
20
+ def self.root
21
+ File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
22
+ end
23
+ end # class Alphabet
data/test/helper.rb ADDED
@@ -0,0 +1,10 @@
1
+ ## $:.unshift(File.dirname(__FILE__))
2
+
3
+ ## minitest setup
4
+
5
+ require 'minitest/autorun'
6
+
7
+
8
+ ## our own code
9
+
10
+ require 'alphabets'
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_downcase.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestDowncase < MiniTest::Test
11
+
12
+ def test_downcase_i18n
13
+ assert_equal 'abcdefghijklmnopqrstuvwxyz', downcase_i18n( 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' )
14
+ assert_equal 'äöü', downcase_i18n( 'ÄÖÜ' )
15
+ assert_equal 'köln', downcase_i18n( 'KÖLN' )
16
+ end
17
+
18
+ end # class TestDowncase
@@ -0,0 +1,36 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_unaccent.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestUnaccent < MiniTest::Test
11
+
12
+ def test_de
13
+ assert_equal 'Augsburg', unaccent( 'Augsburg' )
14
+
15
+ assert_equal 'Koln', unaccent( 'Köln' )
16
+ assert_equal '1. FC Koln', unaccent( '1. FC Köln' )
17
+
18
+ assert_equal 'Bayern Munchen', unaccent( 'Bayern München' )
19
+ assert_equal 'F. Dusseldorf', unaccent( 'F. Düsseldorf' )
20
+ assert_equal 'Preussen', unaccent( 'Preußen' )
21
+ assert_equal 'Munster Preussen', unaccent( 'Münster Preußen' )
22
+ assert_equal 'Rot-Weiss Oberhausen', unaccent( 'Rot-Weiß Oberhausen' )
23
+
24
+ assert_equal 'St. Polten', unaccent( 'St. Pölten' )
25
+ end
26
+
27
+ def test_es
28
+ assert_equal 'Madrid', unaccent( 'Madrid' )
29
+
30
+ assert_equal 'Atletico Madrid', unaccent( 'Atlético Madrid' )
31
+ assert_equal 'Ecija Balompie', unaccent( 'Écija Balompié' )
32
+ assert_equal 'La Coruna', unaccent( 'La Coruña' )
33
+ assert_equal 'Almeria', unaccent( 'Almería' )
34
+ end
35
+
36
+ end # class TestUnaccent
@@ -0,0 +1,36 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_variants.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestVariants < MiniTest::Test
11
+
12
+ def test_de
13
+ assert_equal [], variants( 'Augsburg' )
14
+
15
+ assert_equal ['Koln', 'Koeln'], variants( 'Köln' )
16
+ assert_equal ['1. FC Koln', '1. FC Koeln'], variants( '1. FC Köln' )
17
+
18
+ assert_equal ['Bayern Munchen', 'Bayern Muenchen'], variants( 'Bayern München' )
19
+ assert_equal ['F. Dusseldorf', 'F. Duesseldorf'], variants( 'F. Düsseldorf' )
20
+ assert_equal ['Preussen'], variants( 'Preußen' )
21
+ assert_equal ['Munster Preussen', 'Muenster Preussen'], variants( 'Münster Preußen' )
22
+ assert_equal ['Rot-Weiss Oberhausen'], variants( 'Rot-Weiß Oberhausen' )
23
+
24
+ assert_equal ['St. Polten', 'St. Poelten'], variants( 'St. Pölten' )
25
+ end
26
+
27
+ def test_es
28
+ assert_equal [], variants( 'Madrid' )
29
+
30
+ assert_equal ['Atletico Madrid'], variants( 'Atlético Madrid' )
31
+ assert_equal ['Ecija Balompie'], variants( 'Écija Balompié' )
32
+ assert_equal ['La Coruna'], variants( 'La Coruña' )
33
+ assert_equal ['Almeria'], variants( 'Almería' )
34
+ end
35
+
36
+ end # class TestVariants
metadata ADDED
@@ -0,0 +1,90 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: alphabets
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Gerald Bauer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-08-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rdoc
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '4.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '4.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: hoe
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.16'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.16'
41
+ description: 'alphabets - '
42
+ email: opensport@googlegroups.com
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files:
46
+ - HISTORY.md
47
+ - Manifest.txt
48
+ - NOTES.md
49
+ - README.md
50
+ files:
51
+ - HISTORY.md
52
+ - Manifest.txt
53
+ - NOTES.md
54
+ - README.md
55
+ - Rakefile
56
+ - lib/alphabets.rb
57
+ - lib/alphabets/alphabets.rb
58
+ - lib/alphabets/variants.rb
59
+ - lib/alphabets/version.rb
60
+ - test/helper.rb
61
+ - test/test_downcase.rb
62
+ - test/test_unaccent.rb
63
+ - test/test_variants.rb
64
+ homepage: https://github.com/sportdb/sport.db
65
+ licenses:
66
+ - Public Domain
67
+ metadata: {}
68
+ post_install_message:
69
+ rdoc_options:
70
+ - "--main"
71
+ - README.md
72
+ require_paths:
73
+ - lib
74
+ required_ruby_version: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: 2.2.2
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ requirements: []
85
+ rubyforge_project:
86
+ rubygems_version: 2.5.2
87
+ signing_key:
88
+ specification_version: 4
89
+ summary: alphabets -
90
+ test_files: []