RubyGems - alphabets - Versions diffs - 0.0.1 - Mend

alphabets 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 2679f375c1118915625e06811cf773a4d59707a6
+  data.tar.gz: 5375336c3b4d0002923547f5a9d41498d629722f
+SHA512:
+  metadata.gz: 3ff7470fe10524e8b0080cc36c9eb7a2534f70f75a4f47ca1cf46698bb2cf68a8845b45a364c01a65469bfa3d42f7865172aff8217103766043675b30924bf7a
+  data.tar.gz: '018796bf5c80bc970458716cf297f53616fb2ee1a031e91fa184d5351483564760a688e283368d760838832350b683d6951c4ce857eaa168177269ae451c10dd'

data/HISTORY.md ADDED Viewed

@@ -0,0 +1,3 @@
+### 0.0.1 / 2019-08-13
+* Everything is new. First release.

data/Manifest.txt ADDED Viewed

@@ -0,0 +1,13 @@
+HISTORY.md
+Manifest.txt
+NOTES.md
+README.md
+Rakefile
+lib/alphabets.rb
+lib/alphabets/alphabets.rb
+lib/alphabets/variants.rb
+lib/alphabets/version.rb
+test/helper.rb
+test/test_downcase.rb
+test/test_unaccent.rb
+test/test_variants.rb

data/NOTES.md ADDED Viewed

@@ -0,0 +1,3 @@
+# Notes
+## Todos

data/README.md ADDED Viewed

@@ -0,0 +1,26 @@
+# alphabets -
+* home  :: [github.com/sportdb/sport.db](https://github.com/sportdb/sport.db)
+* bugs  :: [github.com/sportdb/sport.db/issues](https://github.com/sportdb/sport.db/issues)
+* gem   :: [rubygems.org/gems/alphabets](https://rubygems.org/gems/alphabets)
+* rdoc  :: [rubydoc.info/gems/alphabets](http://rubydoc.info/gems/alphabets)
+* forum :: [opensport](http://groups.google.com/group/opensport)
+## Usage
+To be done
+## License
+The `alphabets` scripts are dedicated to the public domain.
+Use it as you please with no restrictions whatsoever.
+## Questions? Comments?
+Send them along to the
+[Open Sports & Friends Forum/Mailing List](http://groups.google.com/group/opensport).
+Thanks!

data/Rakefile ADDED Viewed

@@ -0,0 +1,28 @@
+require 'hoe'
+require './lib/alphabets/version.rb'
+Hoe.spec 'alphabets' do
+  self.version = Alphabet::VERSION
+  self.summary = "alphabets - "
+  self.description = summary
+  self.urls = ['https://github.com/sportdb/sport.db']
+  self.author = 'Gerald Bauer'
+  self.email = 'opensport@googlegroups.com'
+  # switch extension to .markdown for gihub formatting
+  self.readme_file = 'README.md'
+  self.history_file = 'HISTORY.md'
+  self.licenses = ['Public Domain']
+  self.extra_deps = []
+  self.spec_extras = {
+   :required_ruby_version => '>= 2.2.2'
+  }
+end

data/lib/alphabets.rb ADDED Viewed

@@ -0,0 +1,34 @@
+# encoding: utf-8
+require 'pp'
+###
+# our own code
+require 'alphabets/version' # let version always go first
+require 'alphabets/alphabets'
+require 'alphabets/variants'
+## add "global" convenience helper
+def downcase_i18n( name )
+  Alphabet.downcase_i18n( name )
+end
+def unaccent( name )
+  Alphabet.unaccent( name )   ## using "default" language character mapping / table
+end
+def variants( name )    ## todo/check: rename to unaccent_variants or unaccent_names - why? why not?
+  Variant.find( name )
+end
+## add convenience aliases - also add Alpha - why? why not?
+Abc       = Alphabet
+Alphabets = Alphabet
+Alpha     = Alphabet
+puts Alphabet.banner   # say hello

data/lib/alphabets/alphabets.rb ADDED Viewed

@@ -0,0 +1,186 @@
+# encoding: utf-8
+class Alphabet   ## todo/fix: add alias Abc  and Alpha too? why? why not?
+  def self.frequency_table( name )   ## todo/check: use/rename to char_frequency_table
+    ## calculate the frequency table of letters, digits, etc.
+    freq = Hash.new(0)
+    name.each_char do |ch|
+       freq[ch] += 1
+    end
+    freq
+  end
+  def self.count( freq, mapping_or_chars )
+    chars = if mapping_or_chars.is_a?( Hash )
+              mapping_or_chars.keys
+            else   ## todo/fix: check for is_a? Array and if is String split into Array (on char at a time?) - why? why not?
+              mapping_or_chars  ## assume it's an array/list of characters
+            end
+    chars.reduce(0) do |count,ch|
+      count += freq[ch]
+      count
+    end
+  end
+  def self.tr( name, mapping )
+    buf = String.new
+    name.each_char do |ch|
+      buf << if mapping[ch]
+                mapping[ch]
+              else
+                ch
+              end
+    end
+    buf
+  end
+  class Unaccenter #Worker    ## todo/change - find a better name - why? why not?
+    def initialize( mapping )
+      @mapping = mapping
+    end
+    def count( name )      Alphabet.count( name, @mapping ); end
+    def unaccent( name )   Alphabet.tr( name, @mapping );    end
+  end  # class Unaccent Worker
+  def self.find_unaccenter( key )
+    if key == :de
+      @de ||= Unaccenter.new( UNACCENT_DE )
+      @de
+    else
+      ## use uni(versal) or unicode or something - why? why not?
+      ##  use all or int'l (international) - why? why not?
+      ##  use en  (english) - why? why not?
+      @default ||= Unaccenter.new( UNACCENT )
+      @default
+    end
+  end
+  def self.unaccent( name )
+    @default ||= Unaccenter.new( UNACCENT )
+    @default.unaccent( name )
+  end
+  def self.downcase_i18n( name )    ## our very own downcase for int'l characters / letters
+    tr( name, DOWNCASE )
+  end
+  ## add downcase_uni  - univeral/unicode - why? why not?
+  ##  "simple" unaccent (remove accents/diacritics and unfold ligatures) translation table / mapping
+  UNACCENT = {
+    'Ä'=>'A',  'ä'=>'a',
+    'Á'=>'A',  'á'=>'a',
+               'à'=>'a',
+               'ã'=>'a',
+               'â'=>'a',
+    'Å'=>'A',  'å'=>'a',
+               'æ'=>'ae',
+               'ā'=>'a',
+               'ă'=>'a',
+               'ą'=>'a',
+    'Ç' =>'C', 'ç'=>'c',
+               'ć'=>'c',
+    'Č'=>'C',  'č'=>'c',
+    'É'=>'E',  'é'=>'e',
+               'è'=>'e',
+               'ê'=>'e',
+               'ë'=>'e',
+               'ė'=>'e',
+               'ę'=>'e',
+               'ğ'=>'g',
+    'İ'=>'I',
+    'Í'=>'I',  'í'=>'i',
+               'î'=>'i',
+               'ī'=>'i',
+               'ı'=>'i',
+    'Ł'=>'L', 'ł'=>'l',
+               'ñ'=>'n',
+               'ń'=>'n',
+               'ň'=>'n',
+    'Ö'=>'O',  'ö'=>'o',
+               'ó'=>'o',
+               'õ'=>'o',
+               'ô'=>'o',
+               'ø'=>'o',
+               'ő'=>'o',
+                'ř'=>'r',
+    'Ś'=>'S',
+    'Ş'=>'S',  'ş'=>'s',
+    'Š'=>'S',  'š'=>'s',
+               'ș'=>'s',  ## U+0219
+               'ß'=>'ss',
+               'ţ'=>'t',  ## U+0163
+               'ț'=>'t',  ## U+021B
+               'þ'=>'th',   #### fix!!!! use p - why? why not?
+    'Ü'=>'U',  'ü'=>'u',
+    'Ú'=>'U',  'ú'=>'u',
+               'ū'=>'u',
+               'ý'=>'y',
+               'ź'=>'z',
+               'ż'=>'z',
+    'Ž'=>'Z',  'ž'=>'z',
+  }
+  ##  de,at,ch translation for umlauts
+  UNACCENT_DE = {
+    'Ä'=>'Ae',  'ä'=>'ae',  ### Use AE, OE, UE and NOT Ae, Oe, Ue - why? why not? e.g.VÖST => VOEST or Ö => OE
+    'Ö'=>'Oe',  'ö'=>'oe',
+    'Ü'=>'Ue',  'ü'=>'ue',
+                'ß'=>'ss',
+  }
+  ## add UNACCENT_ES - why? why not?  is Espanyol catalan spelling or spanish (castillian)?
+  # 'ñ'=>'ny',    ## e.g. Español => Espanyol
+  DOWNCASE = %w[A B C D E F G H I J K L M N O P Q R S T U V W X Y Z].reduce({}) do |h,ch|
+    h[ch] = ch.downcase
+    h
+  end.merge(
+    'Ä'=>'ä',
+    'Á'=>'á',
+    'Å'=>'å',
+    'Ç'=>'ç',
+    'Č'=>'č',
+    'É'=>'é',
+    'İ'=>'?',   ## fix - add lowercase
+    'Í'=>'í',
+    'Ł'=>'ł',
+    'Ö'=>'ö',
+    'Ś'=>'?',   ## fix - add lowercase
+    'Ş'=>'ş',
+    'Š'=>'š',
+    'Ü'=>'ü',
+    'Ú'=>'ú',
+    'Ž'=>'ž',
+  )
+end  # class Alphabet

data/lib/alphabets/variants.rb ADDED Viewed

@@ -0,0 +1,72 @@
+# encoding: utf-8
+class Variant    ## (spelling) variant finder / builder for names
+  EN_UNACCENTER = Alphabet.find_unaccenter( :en ) ## assume english (en) as default for know - change to universal/int'l/default or something - why? why not?
+  DE_UNACCENTER = Alphabet.find_unaccenter( :de )
+def self.find( name )
+  alt_names = []
+  freq = Alphabet.frequency_table( name )
+  en = EN_UNACCENTER
+  if en.count( freq ) > 0    # check if includes äöü (that is, character with accents or diacritics) etc.
+    alt_names <<  en.unaccent( name )
+  end
+  de = DE_UNACCENTER
+  if de.count( freq ) > 0
+    alt_names <<  de.unaccent( name )
+  end
+  ## todo - make uniq  e.g. Preußen is Preussen, Preussen 2x
+  alt_names = alt_names.uniq
+  alt_names
+end
+end  # class Variant
+######################################
+#  expiremental class - use (just) Name or NameQ or NameVariant or NameAnalyzer/Query or similar - why? why not?
+##   let's wait for now with usage - let's add more methods as we go along and find more - why? why not?
+class NameQuery
+  def initialize( name )
+    @name = name
+  end
+  def frequency_table
+    @freq ||= Alphabet.frequency_table( @name )
+  end
+  def variants
+    @variants ||= find_variants
+  end
+private
+  EN_UNACCENTER = Alphabet.find_unaccenter( :en ) ## assume english (en) as default for know - change to universal/int'l/default or something - why? why not?
+  DE_UNACCENTER = Alphabet.find_unaccenter( :de )
+  def find_variants
+    alt_names = []
+    freq = frequency_table
+    en = EN_UNACCENTER
+    if en.count( freq ) > 0    # check if includes äöü (that is, character with accents or diacritics) etc.
+      alt_names <<  en.unaccent( @name )
+    end
+    de = DE_UNACCENTER
+    if de.count( freq ) > 0
+      alt_names <<  de.unaccent( @name )
+    end
+    ## todo - make uniq  e.g. Preußen is Preussen, Preussen 2x
+    alt_names = alt_names.uniq
+    alt_names
+  end
+end  ## class VariantName

data/lib/alphabets/version.rb ADDED Viewed

@@ -0,0 +1,23 @@
+# encoding: utf-8
+## todo/check: use a module Alphabets with s to keep version and banner separate - why? why not?
+class Alphabet
+  MAJOR = 0    ## todo: namespace inside version or something - why? why not??
+  MINOR = 0
+  PATCH = 1
+  VERSION = [MAJOR,MINOR,PATCH].join('.')
+  def self.version
+    VERSION
+  end
+  def self.banner
+    "alphabets/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
+  end
+  def self.root
+    File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
+  end
+end # class Alphabet

data/test/helper.rb ADDED Viewed

@@ -0,0 +1,10 @@
+## $:.unshift(File.dirname(__FILE__))
+## minitest setup
+require 'minitest/autorun'
+## our own code
+require 'alphabets'

data/test/test_downcase.rb ADDED Viewed

@@ -0,0 +1,18 @@
+# encoding: utf-8
+###
+#  to run use
+#     ruby -I ./lib -I ./test test/test_downcase.rb
+require 'helper'
+class TestDowncase < MiniTest::Test
+  def test_downcase_i18n
+    assert_equal 'abcdefghijklmnopqrstuvwxyz',  downcase_i18n( 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' )
+    assert_equal 'äöü',  downcase_i18n( 'ÄÖÜ' )
+    assert_equal 'köln', downcase_i18n( 'KÖLN' )
+  end
+end # class TestDowncase

data/test/test_unaccent.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# encoding: utf-8
+###
+#  to run use
+#     ruby -I ./lib -I ./test test/test_unaccent.rb
+require 'helper'
+class TestUnaccent < MiniTest::Test
+  def test_de
+    assert_equal 'Augsburg',             unaccent( 'Augsburg' )
+    assert_equal 'Koln',                 unaccent( 'Köln' )
+    assert_equal '1. FC Koln',           unaccent( '1. FC Köln' )
+    assert_equal 'Bayern Munchen',       unaccent( 'Bayern München' )
+    assert_equal 'F. Dusseldorf',        unaccent( 'F. Düsseldorf' )
+    assert_equal 'Preussen',             unaccent( 'Preußen' )
+    assert_equal 'Munster Preussen',     unaccent( 'Münster Preußen' )
+    assert_equal 'Rot-Weiss Oberhausen', unaccent( 'Rot-Weiß Oberhausen' )
+    assert_equal 'St. Polten',           unaccent( 'St. Pölten' )
+  end
+  def test_es
+    assert_equal 'Madrid',               unaccent( 'Madrid' )
+    assert_equal 'Atletico Madrid',      unaccent( 'Atlético Madrid' )
+    assert_equal 'Ecija Balompie',       unaccent( 'Écija Balompié' )
+    assert_equal 'La Coruna',            unaccent( 'La Coruña' )
+    assert_equal 'Almeria',              unaccent( 'Almería' )
+  end
+end # class TestUnaccent

data/test/test_variants.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# encoding: utf-8
+###
+#  to run use
+#     ruby -I ./lib -I ./test test/test_variants.rb
+require 'helper'
+class TestVariants < MiniTest::Test
+  def test_de
+    assert_equal [],                             variants( 'Augsburg' )
+    assert_equal ['Koln', 'Koeln'],              variants( 'Köln' )
+    assert_equal ['1. FC Koln', '1. FC Koeln'],  variants( '1. FC Köln' )
+    assert_equal ['Bayern Munchen', 'Bayern Muenchen'], variants( 'Bayern München' )
+    assert_equal ['F. Dusseldorf', 'F. Duesseldorf'],   variants( 'F. Düsseldorf' )
+    assert_equal ['Preussen'], variants( 'Preußen' )
+    assert_equal ['Munster Preussen', 'Muenster Preussen'], variants( 'Münster Preußen' )
+    assert_equal ['Rot-Weiss Oberhausen'], variants( 'Rot-Weiß Oberhausen' )
+    assert_equal ['St. Polten', 'St. Poelten'], variants( 'St. Pölten' )
+  end
+  def test_es
+    assert_equal [],                   variants( 'Madrid' )
+    assert_equal ['Atletico Madrid'],  variants( 'Atlético Madrid' )
+    assert_equal ['Ecija Balompie'],   variants( 'Écija Balompié' )
+    assert_equal ['La Coruna'],        variants( 'La Coruña' )
+    assert_equal ['Almeria'],          variants( 'Almería' )
+  end
+end # class TestVariants

metadata ADDED Viewed

@@ -0,0 +1,90 @@
+--- !ruby/object:Gem::Specification
+name: alphabets
+version: !ruby/object:Gem::Version
+  version: 0.0.1
+platform: ruby
+authors:
+- Gerald Bauer
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2019-08-13 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rdoc
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '4.0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '4.0'
+- !ruby/object:Gem::Dependency
+  name: hoe
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '3.16'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '3.16'
+description: 'alphabets - '
+email: opensport@googlegroups.com
+executables: []
+extensions: []
+extra_rdoc_files:
+- HISTORY.md
+- Manifest.txt
+- NOTES.md
+- README.md
+files:
+- HISTORY.md
+- Manifest.txt
+- NOTES.md
+- README.md
+- Rakefile
+- lib/alphabets.rb
+- lib/alphabets/alphabets.rb
+- lib/alphabets/variants.rb
+- lib/alphabets/version.rb
+- test/helper.rb
+- test/test_downcase.rb
+- test/test_unaccent.rb
+- test/test_variants.rb
+homepage: https://github.com/sportdb/sport.db
+licenses:
+- Public Domain
+metadata: {}
+post_install_message:
+rdoc_options:
+- "--main"
+- README.md
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: 2.2.2
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.5.2
+signing_key:
+specification_version: 4
+summary: alphabets -
+test_files: []