alphabets 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2679f375c1118915625e06811cf773a4d59707a6
4
+ data.tar.gz: 5375336c3b4d0002923547f5a9d41498d629722f
5
+ SHA512:
6
+ metadata.gz: 3ff7470fe10524e8b0080cc36c9eb7a2534f70f75a4f47ca1cf46698bb2cf68a8845b45a364c01a65469bfa3d42f7865172aff8217103766043675b30924bf7a
7
+ data.tar.gz: '018796bf5c80bc970458716cf297f53616fb2ee1a031e91fa184d5351483564760a688e283368d760838832350b683d6951c4ce857eaa168177269ae451c10dd'
data/HISTORY.md ADDED
@@ -0,0 +1,3 @@
1
+ ### 0.0.1 / 2019-08-13
2
+
3
+ * Everything is new. First release.
data/Manifest.txt ADDED
@@ -0,0 +1,13 @@
1
+ HISTORY.md
2
+ Manifest.txt
3
+ NOTES.md
4
+ README.md
5
+ Rakefile
6
+ lib/alphabets.rb
7
+ lib/alphabets/alphabets.rb
8
+ lib/alphabets/variants.rb
9
+ lib/alphabets/version.rb
10
+ test/helper.rb
11
+ test/test_downcase.rb
12
+ test/test_unaccent.rb
13
+ test/test_variants.rb
data/NOTES.md ADDED
@@ -0,0 +1,3 @@
1
+ # Notes
2
+
3
+ ## Todos
data/README.md ADDED
@@ -0,0 +1,26 @@
1
+ # alphabets -
2
+
3
+
4
+ * home :: [github.com/sportdb/sport.db](https://github.com/sportdb/sport.db)
5
+ * bugs :: [github.com/sportdb/sport.db/issues](https://github.com/sportdb/sport.db/issues)
6
+ * gem :: [rubygems.org/gems/alphabets](https://rubygems.org/gems/alphabets)
7
+ * rdoc :: [rubydoc.info/gems/alphabets](http://rubydoc.info/gems/alphabets)
8
+ * forum :: [opensport](http://groups.google.com/group/opensport)
9
+
10
+
11
+ ## Usage
12
+
13
+ To be done
14
+
15
+
16
+ ## License
17
+
18
+ The `alphabets` scripts are dedicated to the public domain.
19
+ Use it as you please with no restrictions whatsoever.
20
+
21
+
22
+ ## Questions? Comments?
23
+
24
+ Send them along to the
25
+ [Open Sports & Friends Forum/Mailing List](http://groups.google.com/group/opensport).
26
+ Thanks!
data/Rakefile ADDED
@@ -0,0 +1,28 @@
1
+ require 'hoe'
2
+ require './lib/alphabets/version.rb'
3
+
4
+ Hoe.spec 'alphabets' do
5
+
6
+ self.version = Alphabet::VERSION
7
+
8
+ self.summary = "alphabets - "
9
+ self.description = summary
10
+
11
+ self.urls = ['https://github.com/sportdb/sport.db']
12
+
13
+ self.author = 'Gerald Bauer'
14
+ self.email = 'opensport@googlegroups.com'
15
+
16
+ # switch extension to .markdown for gihub formatting
17
+ self.readme_file = 'README.md'
18
+ self.history_file = 'HISTORY.md'
19
+
20
+ self.licenses = ['Public Domain']
21
+
22
+ self.extra_deps = []
23
+
24
+ self.spec_extras = {
25
+ :required_ruby_version => '>= 2.2.2'
26
+ }
27
+
28
+ end
data/lib/alphabets.rb ADDED
@@ -0,0 +1,34 @@
1
+ # encoding: utf-8
2
+
3
+ require 'pp'
4
+
5
+
6
+ ###
7
+ # our own code
8
+ require 'alphabets/version' # let version always go first
9
+ require 'alphabets/alphabets'
10
+ require 'alphabets/variants'
11
+
12
+
13
+
14
+ ## add "global" convenience helper
15
+ def downcase_i18n( name )
16
+ Alphabet.downcase_i18n( name )
17
+ end
18
+
19
+ def unaccent( name )
20
+ Alphabet.unaccent( name ) ## using "default" language character mapping / table
21
+ end
22
+
23
+ def variants( name ) ## todo/check: rename to unaccent_variants or unaccent_names - why? why not?
24
+ Variant.find( name )
25
+ end
26
+
27
+
28
+ ## add convenience aliases - also add Alpha - why? why not?
29
+ Abc = Alphabet
30
+ Alphabets = Alphabet
31
+ Alpha = Alphabet
32
+
33
+
34
+ puts Alphabet.banner # say hello
@@ -0,0 +1,186 @@
1
+ # encoding: utf-8
2
+
3
+ class Alphabet ## todo/fix: add alias Abc and Alpha too? why? why not?
4
+ def self.frequency_table( name ) ## todo/check: use/rename to char_frequency_table
5
+ ## calculate the frequency table of letters, digits, etc.
6
+ freq = Hash.new(0)
7
+ name.each_char do |ch|
8
+ freq[ch] += 1
9
+ end
10
+ freq
11
+ end
12
+
13
+
14
+ def self.count( freq, mapping_or_chars )
15
+ chars = if mapping_or_chars.is_a?( Hash )
16
+ mapping_or_chars.keys
17
+ else ## todo/fix: check for is_a? Array and if is String split into Array (on char at a time?) - why? why not?
18
+ mapping_or_chars ## assume it's an array/list of characters
19
+ end
20
+
21
+ chars.reduce(0) do |count,ch|
22
+ count += freq[ch]
23
+ count
24
+ end
25
+ end
26
+
27
+
28
+ def self.tr( name, mapping )
29
+ buf = String.new
30
+ name.each_char do |ch|
31
+ buf << if mapping[ch]
32
+ mapping[ch]
33
+ else
34
+ ch
35
+ end
36
+ end
37
+ buf
38
+ end
39
+
40
+
41
+ class Unaccenter #Worker ## todo/change - find a better name - why? why not?
42
+ def initialize( mapping )
43
+ @mapping = mapping
44
+ end
45
+
46
+ def count( name ) Alphabet.count( name, @mapping ); end
47
+ def unaccent( name ) Alphabet.tr( name, @mapping ); end
48
+ end # class Unaccent Worker
49
+
50
+
51
+ def self.find_unaccenter( key )
52
+ if key == :de
53
+ @de ||= Unaccenter.new( UNACCENT_DE )
54
+ @de
55
+ else
56
+ ## use uni(versal) or unicode or something - why? why not?
57
+ ## use all or int'l (international) - why? why not?
58
+ ## use en (english) - why? why not?
59
+ @default ||= Unaccenter.new( UNACCENT )
60
+ @default
61
+ end
62
+ end
63
+
64
+ def self.unaccent( name )
65
+ @default ||= Unaccenter.new( UNACCENT )
66
+ @default.unaccent( name )
67
+ end
68
+
69
+
70
+ def self.downcase_i18n( name ) ## our very own downcase for int'l characters / letters
71
+ tr( name, DOWNCASE )
72
+ end
73
+ ## add downcase_uni - univeral/unicode - why? why not?
74
+
75
+
76
+ ## "simple" unaccent (remove accents/diacritics and unfold ligatures) translation table / mapping
77
+ UNACCENT = {
78
+ 'Ä'=>'A', 'ä'=>'a',
79
+ 'Á'=>'A', 'á'=>'a',
80
+ 'à'=>'a',
81
+ 'ã'=>'a',
82
+ 'â'=>'a',
83
+ 'Å'=>'A', 'å'=>'a',
84
+ 'æ'=>'ae',
85
+ 'ā'=>'a',
86
+ 'ă'=>'a',
87
+ 'ą'=>'a',
88
+
89
+ 'Ç' =>'C', 'ç'=>'c',
90
+ 'ć'=>'c',
91
+ 'Č'=>'C', 'č'=>'c',
92
+
93
+ 'É'=>'E', 'é'=>'e',
94
+ 'è'=>'e',
95
+ 'ê'=>'e',
96
+ 'ë'=>'e',
97
+ 'ė'=>'e',
98
+ 'ę'=>'e',
99
+
100
+ 'ğ'=>'g',
101
+
102
+ 'İ'=>'I',
103
+ 'Í'=>'I', 'í'=>'i',
104
+ 'î'=>'i',
105
+ 'ī'=>'i',
106
+ 'ı'=>'i',
107
+
108
+ 'Ł'=>'L', 'ł'=>'l',
109
+
110
+ 'ñ'=>'n',
111
+ 'ń'=>'n',
112
+ 'ň'=>'n',
113
+
114
+ 'Ö'=>'O', 'ö'=>'o',
115
+ 'ó'=>'o',
116
+ 'õ'=>'o',
117
+ 'ô'=>'o',
118
+ 'ø'=>'o',
119
+ 'ő'=>'o',
120
+
121
+ 'ř'=>'r',
122
+
123
+ 'Ś'=>'S',
124
+ 'Ş'=>'S', 'ş'=>'s',
125
+ 'Š'=>'S', 'š'=>'s',
126
+ 'ș'=>'s', ## U+0219
127
+ 'ß'=>'ss',
128
+
129
+ 'ţ'=>'t', ## U+0163
130
+ 'ț'=>'t', ## U+021B
131
+ 'þ'=>'th', #### fix!!!! use p - why? why not?
132
+
133
+ 'Ü'=>'U', 'ü'=>'u',
134
+ 'Ú'=>'U', 'ú'=>'u',
135
+ 'ū'=>'u',
136
+
137
+ 'ý'=>'y',
138
+
139
+ 'ź'=>'z',
140
+ 'ż'=>'z',
141
+ 'Ž'=>'Z', 'ž'=>'z',
142
+ }
143
+
144
+
145
+ ## de,at,ch translation for umlauts
146
+ UNACCENT_DE = {
147
+ 'Ä'=>'Ae', 'ä'=>'ae', ### Use AE, OE, UE and NOT Ae, Oe, Ue - why? why not? e.g.VÖST => VOEST or Ö => OE
148
+ 'Ö'=>'Oe', 'ö'=>'oe',
149
+ 'Ü'=>'Ue', 'ü'=>'ue',
150
+ 'ß'=>'ss',
151
+ }
152
+
153
+ ## add UNACCENT_ES - why? why not? is Espanyol catalan spelling or spanish (castillian)?
154
+ # 'ñ'=>'ny', ## e.g. Español => Espanyol
155
+
156
+ DOWNCASE = %w[A B C D E F G H I J K L M N O P Q R S T U V W X Y Z].reduce({}) do |h,ch|
157
+ h[ch] = ch.downcase
158
+ h
159
+ end.merge(
160
+ 'Ä'=>'ä',
161
+ 'Á'=>'á',
162
+ 'Å'=>'å',
163
+
164
+ 'Ç'=>'ç',
165
+ 'Č'=>'č',
166
+
167
+ 'É'=>'é',
168
+
169
+ 'İ'=>'?', ## fix - add lowercase
170
+ 'Í'=>'í',
171
+
172
+ 'Ł'=>'ł',
173
+
174
+ 'Ö'=>'ö',
175
+
176
+ 'Ś'=>'?', ## fix - add lowercase
177
+ 'Ş'=>'ş',
178
+ 'Š'=>'š',
179
+
180
+ 'Ü'=>'ü',
181
+ 'Ú'=>'ú',
182
+
183
+ 'Ž'=>'ž',
184
+ )
185
+
186
+ end # class Alphabet
@@ -0,0 +1,72 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ class Variant ## (spelling) variant finder / builder for names
5
+
6
+ EN_UNACCENTER = Alphabet.find_unaccenter( :en ) ## assume english (en) as default for know - change to universal/int'l/default or something - why? why not?
7
+ DE_UNACCENTER = Alphabet.find_unaccenter( :de )
8
+
9
+ def self.find( name )
10
+ alt_names = []
11
+
12
+ freq = Alphabet.frequency_table( name )
13
+
14
+ en = EN_UNACCENTER
15
+ if en.count( freq ) > 0 # check if includes äöü (that is, character with accents or diacritics) etc.
16
+ alt_names << en.unaccent( name )
17
+ end
18
+
19
+ de = DE_UNACCENTER
20
+ if de.count( freq ) > 0
21
+ alt_names << de.unaccent( name )
22
+ end
23
+
24
+ ## todo - make uniq e.g. Preußen is Preussen, Preussen 2x
25
+ alt_names = alt_names.uniq
26
+ alt_names
27
+ end
28
+
29
+ end # class Variant
30
+
31
+
32
+
33
+ ######################################
34
+ # expiremental class - use (just) Name or NameQ or NameVariant or NameAnalyzer/Query or similar - why? why not?
35
+ ## let's wait for now with usage - let's add more methods as we go along and find more - why? why not?
36
+ class NameQuery
37
+ def initialize( name )
38
+ @name = name
39
+ end
40
+
41
+ def frequency_table
42
+ @freq ||= Alphabet.frequency_table( @name )
43
+ end
44
+
45
+ def variants
46
+ @variants ||= find_variants
47
+ end
48
+
49
+ private
50
+ EN_UNACCENTER = Alphabet.find_unaccenter( :en ) ## assume english (en) as default for know - change to universal/int'l/default or something - why? why not?
51
+ DE_UNACCENTER = Alphabet.find_unaccenter( :de )
52
+
53
+ def find_variants
54
+ alt_names = []
55
+
56
+ freq = frequency_table
57
+
58
+ en = EN_UNACCENTER
59
+ if en.count( freq ) > 0 # check if includes äöü (that is, character with accents or diacritics) etc.
60
+ alt_names << en.unaccent( @name )
61
+ end
62
+
63
+ de = DE_UNACCENTER
64
+ if de.count( freq ) > 0
65
+ alt_names << de.unaccent( @name )
66
+ end
67
+
68
+ ## todo - make uniq e.g. Preußen is Preussen, Preussen 2x
69
+ alt_names = alt_names.uniq
70
+ alt_names
71
+ end
72
+ end ## class VariantName
@@ -0,0 +1,23 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ ## todo/check: use a module Alphabets with s to keep version and banner separate - why? why not?
5
+
6
+ class Alphabet
7
+ MAJOR = 0 ## todo: namespace inside version or something - why? why not??
8
+ MINOR = 0
9
+ PATCH = 1
10
+ VERSION = [MAJOR,MINOR,PATCH].join('.')
11
+
12
+ def self.version
13
+ VERSION
14
+ end
15
+
16
+ def self.banner
17
+ "alphabets/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
18
+ end
19
+
20
+ def self.root
21
+ File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
22
+ end
23
+ end # class Alphabet
data/test/helper.rb ADDED
@@ -0,0 +1,10 @@
1
+ ## $:.unshift(File.dirname(__FILE__))
2
+
3
+ ## minitest setup
4
+
5
+ require 'minitest/autorun'
6
+
7
+
8
+ ## our own code
9
+
10
+ require 'alphabets'
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_downcase.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestDowncase < MiniTest::Test
11
+
12
+ def test_downcase_i18n
13
+ assert_equal 'abcdefghijklmnopqrstuvwxyz', downcase_i18n( 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' )
14
+ assert_equal 'äöü', downcase_i18n( 'ÄÖÜ' )
15
+ assert_equal 'köln', downcase_i18n( 'KÖLN' )
16
+ end
17
+
18
+ end # class TestDowncase
@@ -0,0 +1,36 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_unaccent.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestUnaccent < MiniTest::Test
11
+
12
+ def test_de
13
+ assert_equal 'Augsburg', unaccent( 'Augsburg' )
14
+
15
+ assert_equal 'Koln', unaccent( 'Köln' )
16
+ assert_equal '1. FC Koln', unaccent( '1. FC Köln' )
17
+
18
+ assert_equal 'Bayern Munchen', unaccent( 'Bayern München' )
19
+ assert_equal 'F. Dusseldorf', unaccent( 'F. Düsseldorf' )
20
+ assert_equal 'Preussen', unaccent( 'Preußen' )
21
+ assert_equal 'Munster Preussen', unaccent( 'Münster Preußen' )
22
+ assert_equal 'Rot-Weiss Oberhausen', unaccent( 'Rot-Weiß Oberhausen' )
23
+
24
+ assert_equal 'St. Polten', unaccent( 'St. Pölten' )
25
+ end
26
+
27
+ def test_es
28
+ assert_equal 'Madrid', unaccent( 'Madrid' )
29
+
30
+ assert_equal 'Atletico Madrid', unaccent( 'Atlético Madrid' )
31
+ assert_equal 'Ecija Balompie', unaccent( 'Écija Balompié' )
32
+ assert_equal 'La Coruna', unaccent( 'La Coruña' )
33
+ assert_equal 'Almeria', unaccent( 'Almería' )
34
+ end
35
+
36
+ end # class TestUnaccent
@@ -0,0 +1,36 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_variants.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestVariants < MiniTest::Test
11
+
12
+ def test_de
13
+ assert_equal [], variants( 'Augsburg' )
14
+
15
+ assert_equal ['Koln', 'Koeln'], variants( 'Köln' )
16
+ assert_equal ['1. FC Koln', '1. FC Koeln'], variants( '1. FC Köln' )
17
+
18
+ assert_equal ['Bayern Munchen', 'Bayern Muenchen'], variants( 'Bayern München' )
19
+ assert_equal ['F. Dusseldorf', 'F. Duesseldorf'], variants( 'F. Düsseldorf' )
20
+ assert_equal ['Preussen'], variants( 'Preußen' )
21
+ assert_equal ['Munster Preussen', 'Muenster Preussen'], variants( 'Münster Preußen' )
22
+ assert_equal ['Rot-Weiss Oberhausen'], variants( 'Rot-Weiß Oberhausen' )
23
+
24
+ assert_equal ['St. Polten', 'St. Poelten'], variants( 'St. Pölten' )
25
+ end
26
+
27
+ def test_es
28
+ assert_equal [], variants( 'Madrid' )
29
+
30
+ assert_equal ['Atletico Madrid'], variants( 'Atlético Madrid' )
31
+ assert_equal ['Ecija Balompie'], variants( 'Écija Balompié' )
32
+ assert_equal ['La Coruna'], variants( 'La Coruña' )
33
+ assert_equal ['Almeria'], variants( 'Almería' )
34
+ end
35
+
36
+ end # class TestVariants
metadata ADDED
@@ -0,0 +1,90 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: alphabets
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Gerald Bauer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-08-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rdoc
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '4.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '4.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: hoe
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.16'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.16'
41
+ description: 'alphabets - '
42
+ email: opensport@googlegroups.com
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files:
46
+ - HISTORY.md
47
+ - Manifest.txt
48
+ - NOTES.md
49
+ - README.md
50
+ files:
51
+ - HISTORY.md
52
+ - Manifest.txt
53
+ - NOTES.md
54
+ - README.md
55
+ - Rakefile
56
+ - lib/alphabets.rb
57
+ - lib/alphabets/alphabets.rb
58
+ - lib/alphabets/variants.rb
59
+ - lib/alphabets/version.rb
60
+ - test/helper.rb
61
+ - test/test_downcase.rb
62
+ - test/test_unaccent.rb
63
+ - test/test_variants.rb
64
+ homepage: https://github.com/sportdb/sport.db
65
+ licenses:
66
+ - Public Domain
67
+ metadata: {}
68
+ post_install_message:
69
+ rdoc_options:
70
+ - "--main"
71
+ - README.md
72
+ require_paths:
73
+ - lib
74
+ required_ruby_version: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: 2.2.2
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ requirements: []
85
+ rubyforge_project:
86
+ rubygems_version: 2.5.2
87
+ signing_key:
88
+ specification_version: 4
89
+ summary: alphabets -
90
+ test_files: []