RubyGems - textutils - Versions diffs - 0.2.0 → 0.3.0 - Mend

textutils 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

data/Manifest.txt +6 -0
data/Rakefile +12 -1
data/lib/textutils.rb +10 -4
data/lib/textutils/reader/code_reader.rb +34 -0
data/lib/textutils/reader/hash_reader.rb +109 -0
data/lib/textutils/reader/line_reader.rb +90 -0
data/lib/textutils/reader/values_reader.rb +175 -0
data/lib/textutils/utils.rb +71 -0
data/lib/textutils/version.rb +7 -0
metadata +40 -16

data/Manifest.txt CHANGED

@@ -7,3 +7,9 @@ lib/textutils/filter/code_filter.rb
 lib/textutils/filter/comment_filter.rb
 lib/textutils/filter/erb_django_filter.rb
 lib/textutils/filter/erb_filter.rb
+lib/textutils/reader/code_reader.rb
+lib/textutils/reader/hash_reader.rb
+lib/textutils/reader/line_reader.rb
+lib/textutils/reader/values_reader.rb
+lib/textutils/utils.rb
+lib/textutils/version.rb

data/Rakefile CHANGED

@@ -1,5 +1,5 @@
 require 'hoe'
-require './lib/textutils.rb'
+require './lib/textutils/version.rb'
 Hoe.spec 'textutils' do
@@ -16,5 +16,16 @@ Hoe.spec 'textutils' do
   # switch extension to .markdown for gihub formatting
   self.readme_file  = 'README.markdown'
   self.history_file = 'History.markdown'
+  self.extra_deps = [
+    ['logutils', '~> 0.2.0']
+  ]
+  self.licenses = ['Public Domain']
+  self.spec_extras = {
+   :required_ruby_version => '>= 1.9.2'
+  }
 end

data/lib/textutils.rb CHANGED

@@ -8,17 +8,23 @@ require 'optparse'
 require 'fileutils'
 require 'erb'
+# 3rd party gems / libs
+require 'logutils'
 # our own code
+require 'textutils/version'
 require 'textutils/filter/code_filter'
 require 'textutils/filter/comment_filter'
 require 'textutils/filter/erb_django_filter'
 require 'textutils/filter/erb_filter'
+require 'textutils/utils'
+require 'textutils/reader/code_reader'
+require 'textutils/reader/hash_reader'
+require 'textutils/reader/line_reader'
+require 'textutils/reader/values_reader'
-module TextUtils
-  VERSION = '0.2.0'
-end   # module TextUtils

data/lib/textutils/reader/code_reader.rb ADDED

@@ -0,0 +1,34 @@
+# encoding: utf-8
+class CodeReader
+  def initialize( logger=nil, path )
+    if logger.nil?
+      @logger = Logger.new(STDOUT)
+      @logger.level = Logger::INFO
+    else
+      @logger = logger
+    end
+    @path  = path
+    ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
+    ## - see worlddb/utils.rb
+    @code  = File.read_utf8( @path )
+  end
+  def eval( klass )
+    klass.class_eval( @code )
+    # NB: same as
+    #
+    # module WorldDB
+    #   include WorldDB::Models
+    #  <code here>
+    # end
+  end
+  attr_reader :logger
+end # class CodeReader

data/lib/textutils/reader/hash_reader.rb ADDED

@@ -0,0 +1,109 @@
+# encoding: utf-8
+class HashReader
+  def initialize( logger=nil, path )
+    if logger.nil?
+      @logger = Logger.new(STDOUT)
+      @logger.level = Logger::INFO
+    else
+      @logger = logger
+    end
+    @path = path
+    ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
+    ## - see worlddb/utils.rb
+    text = File.read_utf8( @path )
+    ### hack for syck yaml parser (e.g.ruby 1.9.2) (cannot handle !!null)
+    ##   change it to !null to get plain nil
+    ##   w/ both syck and psych/libyml
+    text = text.gsub( '!!null', '!null' )
+    ### hacks for yaml
+    ### see yaml gotschas
+    ##  - http://www.perlmonks.org/?node_id=738671
+    ##  -
+    ## replace all tabs w/ two spaces and issue a warning
+    ## nb: yaml does NOT support tabs see why here -> yaml.org/faq.html
+    text = text.gsub( "\t" ) do |_|
+      puts "*** warn: hash reader - found tab (\t) replacing w/ two spaces; yaml forbids tabs; see yaml.org/faq.html"
+      '  '  # replace w/ two spaces
+    end
+    ## quote implicit boolean types on,no,n,y
+    ## nb: escape only if key e.g. no: or "free standing" value on its own line e.g.
+    ##   no: no
+    text = text.gsub( /^([ ]*)(ON|On|on|NO|No|no|N|n|Y|y)[ ]*:/ ) do |value|
+      puts "*** warn: hash reader - found implicit bool (#{$1}#{$2}) for key; adding quotes to turn into string; see yaml.org/refcard.html"
+      # nb: preserve leading spaces for structure - might be significant
+      "#{$1}'#{$2}':"  # add quotes to turn it into a string (not bool e.g. true|false)
+    end
+    ## nb: value must be freestanding (only allow optional eol comment)
+    ##  do not escape if part of string sequence e.g.
+    ##  key: nb,nn,no,se   => nb,nn,'no',se  -- avoid!!
+    text = text.gsub( /:[ ]+(ON|On|on|NO|No|no|N|n|Y|y)[ ]*($| #.*$)/ ) do |value|
+      puts "*** warn: hash reader - found implicit bool (#{$1}) for value; adding quotes to turn into string; see yaml.org/refcard.html"
+      ": '#{$1}'"  # add quotes to turn it into a string (not bool e.g. true|false)
+    end
+    @hash = YAML.load( text )
+  end
+  attr_reader :logger
+  ###
+  # nb: returns all values as strings
+  #
+  def each
+    @hash.each do |key_wild, value_wild|
+      # normalize
+      # - key n value as string (not symbols, bool? int? array?)
+      # - remove leading and trailing whitespace
+      key   = key_wild.to_s.strip
+      value = value_wild.to_s.strip
+      puts "yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value}<<"
+      yield( key, value )
+    end
+  end # method each
+  ###
+  # todo: what name to use: each_object or each_typed ???
+  #   or use new TypedHashReader class or similar??
+  def each_typed
+    @hash.each do |key_wild, value_wild|
+      # normalize
+      # - key n value as string (not symbols, bool? int? array?)
+      # - remove leading and trailing whitespace
+      key   = key_wild.to_s.strip
+      if value_wild.is_a?( String )
+        value = value_wild.strip
+      else
+        value = value_wild
+      end
+      puts "yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value}<<"
+      yield( key, value )
+    end
+  end # method each
+end # class HashReader

data/lib/textutils/reader/line_reader.rb ADDED

@@ -0,0 +1,90 @@
+# encoding: utf-8
+##
+## fix/todo: move to/merge into LineReader itself
+#   e.g. use  fromString c'tor ??? or similar??
+class StringLineReader
+  def initialize( logger=nil, data )
+    if logger.nil?
+      @logger = Logger.new(STDOUT)
+      @logger.level = Logger::INFO
+    else
+      @logger = logger
+    end
+    @data = data
+  end
+  attr_reader :logger
+  def each_line
+    @data.each_line do |line|
+      if line =~ /^\s*#/
+        # skip komments and do NOT copy to result (keep comments secret!)
+        logger.debug 'skipping comment line'
+        next
+      end
+      if line =~ /^\s*$/
+        # kommentar oder leerzeile überspringen
+        logger.debug 'skipping blank line'
+        next
+      end
+      # remove leading and trailing whitespace
+      line = line.strip
+      yield( line )
+    end # each lines
+  end # method each_line
+end
+class LineReader
+  def initialize( logger=nil, path )
+    if logger.nil?
+      @logger = Logger.new(STDOUT)
+      @logger.level = Logger::INFO
+    else
+      @logger = logger
+    end
+    @path = path
+    ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
+    ## - see worlddb/utils.rb
+    @data = File.read_utf8( @path )
+  end
+  attr_reader :logger
+  def each_line
+    @data.each_line do |line|
+      if line =~ /^\s*#/
+        # skip komments and do NOT copy to result (keep comments secret!)
+        logger.debug 'skipping comment line'
+        next
+      end
+      if line =~ /^\s*$/
+        # kommentar oder leerzeile überspringen
+        logger.debug 'skipping blank line'
+        next
+      end
+      # remove leading and trailing whitespace
+      line = line.strip
+      yield( line )
+    end # each lines
+  end # method each_line
+end # class LineReader

data/lib/textutils/reader/values_reader.rb ADDED

@@ -0,0 +1,175 @@
+# encoding: utf-8
+class ValuesReader
+  def initialize( logger, path, more_values={} )
+    ## todo: check - can we make logger=nil a default arg too?
+    if logger.nil?
+      @logger = Logger.new(STDOUT)
+      @logger.level = Logger::INFO
+    else
+      @logger = logger
+    end
+    @path = path
+    @more_values = more_values
+    @data = File.read_utf8( @path )
+  end
+  attr_reader :logger
+  def each_line
+    @data.each_line do |line|
+      if line =~ /^\s*#/
+        # skip komments and do NOT copy to result (keep comments secret!)
+        logger.debug 'skipping comment line'
+        next
+      end
+      if line =~ /^\s*$/
+        # kommentar oder leerzeile überspringen
+        logger.debug 'skipping blank line'
+        next
+      end
+      # pass 1) remove possible trailing eol comment
+      ##  e.g    -> nyc, New York   # Sample EOL Comment Here (with or without commas,,,,)
+      ## becomes -> nyc, New York
+      line = line.sub( /\s+#.+$/, '' )
+      # pass 2) remove leading and trailing whitespace
+      line = line.strip
+      puts "line: >>#{line}<<"
+      values = line.split(',')
+      # pass 1) remove leading and trailing whitespace for values
+      values = values.map { |value| value.strip }
+      ##### todo remove support of comment column? (NB: must NOT include commas)
+      # pass 2) remove comment columns
+      values = values.select do |value|
+        if value =~ /^#/  ## start with # treat it as a comment column; e.g. remove it
+          puts "   removing column with value >>#{value}<<"
+          false
+        else
+          true
+        end
+      end
+      puts "  values: >>#{values.join('<< >>')}<<"
+      ### todo/fix: allow check - do NOT allow mixed use of with key and w/o key
+      ##  either use keys or do NOT use keys; do NOT mix in a single fixture file
+      ### support autogenerate key from first title value
+      # if it looks like a key (only a-z lower case allowed); assume it's a key
+      #   - also allow . in keys e.g. world.quali.america, at.cup, etc.
+      if values[0] =~ /^[a-z][a-z.]*[a-z]$/    # NB: minimum two a-z letters required
+        key_col         = values[0]
+        title_col       = values[1]
+        more_cols       = values[2..-1]
+      else
+        key_col         = '<auto>'
+        title_col       = values[0]
+        more_cols       = values[1..-1]
+      end
+      attribs = {}
+      ## title (split of optional synonyms)
+      # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
+      titles = title_col.split('|')
+      attribs[ :title ]    =  titles[0]
+      ## add optional synonyms if present
+      attribs[ :synonyms ] =  titles[1..-1].join('|')  if titles.size > 1
+      if key_col == '<auto>'
+        ## autogenerate key from first title
+        key_col = title_to_key( titles[0] )
+        puts "   autogen key >#{key_col}< from title >#{titles[0]}<"
+      end
+      attribs[ :key ] = key_col
+      attribs = attribs.merge( @more_values )  # e.g. merge country_id and other defaults if present
+      yield( attribs, more_cols )
+    end # each lines
+  end # method each_line
+  def title_to_key( title )
+      ## NB: downcase does NOT work for accented chars (thus, include in alternatives)
+      key = title.downcase
+      ### remove optional english translation in square brackets ([]) e.g. Wien [Vienna]
+      key = key.gsub( /\[.+\]/, '' )
+      ## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
+      key = key.gsub( /\(.+\)/, '' )
+      ## remove all whitespace and punctuation
+      key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
+      ##  turn accented char into ascii look alike if possible
+      ##
+      ## todo: add some more
+      ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references  for more
+      alternatives = [
+        ['ß', 'ss'],
+        ['æ', 'ae'],
+        ['ä', 'ae'],
+        ['á', 'a' ],  # e.g. Bogotá, Králové
+        ['ã', 'a' ],  # e.g  São Paulo
+        ['ă', 'a' ],  # e.g. Chișinău
+        ['é', 'e' ],  # e.g. Vélez, Králové
+        ['è', 'e' ],  # e.g. Rivières
+        ['ê', 'e' ],  # e.g. Grêmio
+        ['ě', 'e' ],  # e.g. Budějovice
+        ['ì', 'i' ],  # e.g. Potosì
+        ['í', 'i' ],  # e.g. Ústí
+        ['ñ', 'n' ],  # e.g. Porteño
+        ['ň', 'n' ],  # e.g. Plzeň, Třeboň
+        ['ö', 'oe'],
+        ['ó', 'o' ],  # e.g. Colón, Łódź, Kraków
+        ['ř', 'r' ],  # e.g. Třeboň
+        ['ș', 's' ],  # e.g. Chișinău
+        ['ü', 'ue'],
+        ['ú', 'u' ],  # e.g. Fútbol
+        ['ź', 'z' ],  # e.g. Łódź
+        ['Č', 'c' ],  # e.g. České
+        ['Ł', 'l' ],  # e.g. Łódź
+        ['Ú', 'u' ],  # e.g. Ústí
+      ]
+      alternatives.each do |alt|
+        key = key.gsub( alt[0], alt[1] )
+      end
+      key
+  end # method title_to_key
+end # class ValuesReader

data/lib/textutils/utils.rb ADDED

@@ -0,0 +1,71 @@
+# encoding: utf-8
+class File
+  def self.read_utf8( path )
+    text = open( path, 'r:bom|utf-8' ) do |file|
+      file.read
+    end
+    ### for convenience for now convert fancy dash to "plain" dash
+    ## todo: check char codes for "fancy" dash alternatives
+    text.gsub!( /—|–/ ) do |_|
+      puts "*** warning: convert fancy dash to 'plain' dash in file >#{path}<"
+      ### exit 1   #### do NOT tolerate!! cleanup dashes for now
+      '-'
+    end
+    text
+  end
+end # class File
+  ############
+  ### fix/todo: share helper for all text readers/parsers- where to put it?
+  ###
+  def title_esc_regex( title_unescaped )
+      ##  escape regex special chars e.g. . to \. and ( to \( etc.
+      # e.g. Benfica Lis.
+      # e.g. Club Atlético Colón (Santa Fe)
+      ## NB: cannot use Regexp.escape! will escape space '' to '\ '
+      ## title = Regexp.escape( title_unescaped )
+      title = title_unescaped.gsub( '.', '\.' )
+      title = title.gsub( '(', '\(' )
+      title = title.gsub( ')', '\)' )
+      ##  match accented char with or without accents
+      ##  add (ü|ue) etc.
+      ## also make - optional change to (-| ) e.g. Blau-Weiss == Blau Weiss
+      ## todo: add some more
+      ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references  for more
+      ##
+      ##  reuse for all readers!
+      alternatives = [
+        ['-', '(-| )'],  ## e.g. Blau-Weiß Linz
+        ['æ', '(æ|ae)'],  ## e.g.
+        ['á', '(á|a)'],  ## e.g. Bogotá, Sársfield
+        ['ã', '(ã|a)'],  ## e.g  São Paulo
+        ['ä', '(ä|ae)'],  ## e.g.
+        ['ç', '(ç|c)'],  ## e.g. Fenerbahçe
+        ['é', '(é|e)'],  ## e.g. Vélez
+        ['ê', '(ê|e)'],  ## e.g. Grêmio
+        ['ñ', '(ñ|n)'],  ## e.g. Porteño
+        ['ň', '(ň|n)'],  ## e.g. Plzeň
+        ['Ö', '(Ö|Oe)'], ## e.g. Österreich
+        ['ö', '(ö|oe)'],  ## e.g. Mönchengladbach
+        ['ó', '(ó|o)'],   ## e.g. Colón
+        ['ș', '(ș|s)'],   ## e.g. Bucarești
+        ['ß', '(ß|ss)'],  ## e.g. Blau-Weiß Linz
+        ['ü', '(ü|ue)'],  ## e.g.
+        ['ú', '(ú|u)']  ## e.g. Fútbol
+      ]
+      alternatives.each do |alt|
+        title = title.gsub( alt[0], alt[1] )
+      end
+      title
+  end

data/lib/textutils/version.rb ADDED

@@ -0,0 +1,7 @@
+module TextUtils
+  VERSION = '0.3.0'
+end   # module TextUtils

metadata CHANGED

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: textutils
 version: !ruby/object:Gem::Version
-  hash: 23
+  hash: 19
   prerelease:
   segments:
   - 0
-  - 2
+  - 3
   - 0
-  version: 0.2.0
+  version: 0.3.0
 platform: ruby
 authors:
 - Gerald Bauer
@@ -15,12 +15,28 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-06-09 00:00:00 Z
+date: 2013-02-20 00:00:00 Z
 dependencies:
 - !ruby/object:Gem::Dependency
-  name: rdoc
+  name: logutils
   prerelease: false
   requirement: &id001 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        hash: 23
+        segments:
+        - 0
+        - 2
+        - 0
+        version: 0.2.0
+  type: :runtime
+  version_requirements: *id001
+- !ruby/object:Gem::Dependency
+  name: rdoc
+  prerelease: false
+  requirement: &id002 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
@@ -31,22 +47,22 @@ dependencies:
         - 10
         version: "3.10"
   type: :development
-  version_requirements: *id001
+  version_requirements: *id002
 - !ruby/object:Gem::Dependency
   name: hoe
   prerelease: false
-  requirement: &id002 !ruby/object:Gem::Requirement
+  requirement: &id003 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
-        hash: 7
+        hash: 1
         segments:
         - 3
-        - 0
-        version: "3.0"
+        - 3
+        version: "3.3"
   type: :development
-  version_requirements: *id002
+  version_requirements: *id003
 description: textutils - Text Filters and Helpers
 email: webslideshow@googlegroups.com
 executables: []
@@ -65,9 +81,15 @@ files:
 - lib/textutils/filter/comment_filter.rb
 - lib/textutils/filter/erb_django_filter.rb
 - lib/textutils/filter/erb_filter.rb
+- lib/textutils/reader/code_reader.rb
+- lib/textutils/reader/hash_reader.rb
+- lib/textutils/reader/line_reader.rb
+- lib/textutils/reader/values_reader.rb
+- lib/textutils/utils.rb
+- lib/textutils/version.rb
 homepage: http://geraldb.github.com/textutils
-licenses: []
+licenses:
+- Public Domain
 post_install_message:
 rdoc_options:
 - --main
@@ -79,10 +101,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      hash: 3
+      hash: 55
       segments:
-      - 0
-      version: "0"
+      - 1
+      - 9
+      - 2
+      version: 1.9.2
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements: