RubyGems - factbook - Versions diffs - 1.1.0 → 1.1.1 - Mend

factbook 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

checksums.yaml +4 -4
data/Manifest.txt +18 -0
data/README.md +7 -0
data/data/attributes.yml +337 -0
data/data/categories.csv +1 -1
data/lib/factbook.rb +29 -14
data/lib/factbook/almanac.rb +72 -0
data/lib/factbook/attributes.rb +74 -0
data/lib/factbook/builder.rb +2 -2
data/lib/factbook/builder_item.rb +7 -8
data/lib/factbook/builder_json.rb +79 -0
data/lib/factbook/counter.rb +48 -0
data/lib/factbook/normalize.rb +43 -0
data/lib/factbook/page.rb +37 -45
data/lib/factbook/page_info.rb +12 -0
data/lib/factbook/reader_json.rb +51 -0
data/lib/factbook/sanitizer.rb +0 -7
data/lib/factbook/version.rb +1 -1
data/script/almanac.rb +48 -0
data/script/attributes.rb +34 -0
data/script/build.rb +28 -0
data/script/counter.rb +145 -0
data/script/json.rb +18 -0
data/test/data/json/au.json +892 -0
data/test/test_attribs.rb +33 -2
data/test/test_attribs_def.rb +20 -0
data/test/test_counter.rb +31 -0
data/test/test_json_builder.rb +25 -0
data/test/test_normalize.rb +23 -0
metadata +20 -2

data/lib/factbook/almanac.rb ADDED

@@ -0,0 +1,72 @@
+# encoding: utf-8
+module Factbook
+class Almanac
+  ## convenience helper ("factory")
+  def self.from_json( codes, json_dir: '.' )
+    pages = JsonPageReader.new( json_dir ).read_pages( codes )
+    self.new( pages )
+  end
+  def initialize( pages )
+    @pages = pages
+  end
+  def render( template )
+    buf = ''
+    @pages.each do |page|
+      text = PageCtx.new( page, template ).render
+      puts text     ## for debugging write country profile to console (too)
+      buf << text
+    end
+    puts "count: #{@pages.count}"
+    buf   ## return buffered almanac text
+  end
+class PageCtx
+  attr_accessor :page
+  def initialize(page, template)
+    @page     = page
+    @template = template
+  end
+  ##############################
+  ## add some "view helpers"
+  def name
+    ##  -- calculate name (use long name if (short) name is not availabe e.g. none)
+    ##   e.g. Austria
+    if @name.nil?
+      @name = page.name
+      @name = page.name_long  if @name == 'none'
+    end
+    @name
+  end
+  def names( separator: ' • ' )
+    ##  e.g. Austria • Österreich
+    if @names.nil?
+      if page.name_local.blank? || page.name_local == 'none' || page.name_local == name
+        @names = [name]    ## no local (in its own non-english language) name
+      else
+        @names = [name, page.name_local]
+      end
+    end
+    @names.join( separator )
+  end
+  def render
+    ERB.new( @template).result( binding )
+  end
+end   ## PageCtx
+end ## Almanac
+end # module Factbook

data/lib/factbook/attributes.rb ADDED

@@ -0,0 +1,74 @@
+# encoding: utf-8
+module Factbook
+class Attributes
+  Attribute = Struct.new( :name,
+                          :category,  ## e.g. Introduction, Geography, etc.
+                          :path,      ## note: is an array  e.g. ["Area - comparative"] or ["Area", "land"] etc.
+                        )
+  def self.from_yaml( path )
+    h = YAML.load_file( path )
+    pp h
+    attribs = []
+    ## note: use a copy (e.g. h.dup) for now (hash gets changed by build_attribs!!)
+    new_h = h.dup
+    new_h.each do |k,v|
+      category = k
+      build_attribs( attribs, category, [], v )
+    end
+    self.new( attribs )
+  end
+  def self.build_attribs( attribs, category, path, h )
+      ## assume it's an attribute definition hash
+      ##   note: !! exclude special cases:
+      ##      Capital           -- incl. name key itself
+      ##      National anthem
+     if h.has_key?( 'name' ) &&  ['Capital','National anthem'].include?( path[-1] ) == false
+       a = Attribute.new
+       a.name     = h['name']
+       a.category = category
+       a.path     = path
+       puts "  adding attribute >#{a.name}< using #{a.category} / #{a.path.inspect}"
+       attribs << a
+       ## note: make sure a modifable copy (of h) gets passed in
+       h.delete( 'name' )
+     end
+     return  if h.empty?    ## empty hash; nothing (more) to do; return
+     ## continue walking (recursive)
+     h.each do |k,v|
+       new_path = path.dup << k   ## note: create a new array (copy)
+       build_attribs( attribs, category, new_path, v )
+    end
+  end
+  def initialize( attribs )
+    @attribs = attribs
+  end
+  def to_a() @attribs; end
+  def size() @attribs.size; end
+  def each
+    @attribs.each { |attrib| yield( attrib ) }
+  end
+end  # class Attributes
+end # module Factbook

data/lib/factbook/builder.rb CHANGED

@@ -29,7 +29,7 @@ end
 attr_reader :html_ascii,     ## full "original" 1:1 page in "original/ascii8/binary" encoding
             :html,           ## utf-8 encoded profile
             :html_debug,     ## html w/ mapping markers - rename to html_markers - why? why not?
-            :page_info,      ## incl. country_name, region_name, last_updated etc.
+            :info,            ## page info incl. country_name, region_name, last_updated etc.
             :errors,          ## encoding erros etc.
             :sects
@@ -38,7 +38,7 @@ def initialize( html_ascii )
   @html_ascii = html_ascii
   ## todo/fix: use/assume windows 12xx?? encoding - change encoding to utf-8  (from binary/ascii8bit)
-  @html, @page_info, @errors = Sanitizer.new.sanitize( @html_ascii )
+  @html, @info, @errors = Sanitizer.new.sanitize( @html_ascii )
   @html_debug = map_sects( @html )
   @html_debug = map_subsects( @html_debug )

data/lib/factbook/builder_item.rb CHANGED

@@ -4,6 +4,7 @@ module Factbook
 class ItemBuilder       ## renameto ItemReader, ItemParser - why? why not??
   include LogUtils::Logging
+  include NormalizeHelper    ##  e.g. normalize_category
 def initialize( html, name )
   @html = html
@@ -42,7 +43,7 @@ def read
             last_node['text'] += " #{text}"    ## append w/o separator
          end
       else
-        if @name == 'demographic_profile' || @name == 'Demographic profile'  ## special case (use space a sep)
+        if @name == 'Demographic profile'  ## special case (use space a sep)
             last_node['text'] += " #{text}"   ## append without (w/o) separator
         else
             last_node['text'] += " ++ #{text}"   ## append with ++ separator
@@ -60,14 +61,11 @@ def read
       ## pp spans
       span_key   = spans[0]  ## assume 1st entry is span.category
-      span_value = spans[1]  ## assume 2nd entry is span.category_data')
-      ## allow optional category_data for now
-      key   = span_key.text
-      key   = key.strip
-      key   = key.sub( /:\z/, '' )    # remove trailing : if present
-      key   = key.strip
+      span_value = spans[1]  ## assume 2nd entry is span.category_data
+      key   = normalize_category( span_key.text )
+      ## note: allow optional category_data for now
       value = span_value ? span_value.text : nil
       puts "key: >#{key}<, value: >#{value}< : #{value.class.name}"
@@ -87,6 +85,7 @@ def read
   pp data
   data
 end
 end # class ItemBuilder

data/lib/factbook/builder_json.rb ADDED

@@ -0,0 +1,79 @@
+# encoding: utf-8
+module Factbook
+######
+# json builder -- lets us rebuild a page from "dumped" json (instead of parsing html page)
+class JsonBuilder
+  include LogUtils::Logging
+  include NormalizeHelper    ##  e.g. normalize_category
+def self.from_file( path )
+  text = File.read( path )     ## fix: use File.read_utf8  from textutils
+  self.from_string( text )
+end
+def self.from_string( text )
+  self.new( text )
+end
+attr_reader :text,
+            :json,
+            :info,            ## not used yet -- page info incl. country_name, region_name, last_updated etc.
+            :errors,          ## not used yet -- encoding erros etc.
+            :sects
+def initialize( text )
+  @text = text
+  @json = JSON.parse( text )
+  @info   = nil   ## fix/todo: sorry - for now no page info (use header in json - why? why not??)
+  @errors = []       ## fix/todo: sorry - for now no errors possible/tracked
+  @sects = []
+  @json.each do |k1,v1|
+    sect_title    = k1
+    sect_subsects = v1
+    sect = Sect.new
+    sect.title = sect_title
+    ## get subsections
+    subsects = []
+    sect_subsects.each do |k2,v2|
+      subsect_title = k2
+      subsect_data  = v2
+      subsect = Subsect.new
+      subsect.title = subsect_title
+      #####
+      ## note: run data hash through normalize_category (again)
+      if subsect_data.is_a?( Hash )
+        new_subsect_data = {}
+        subsect_data.each do |k3,v3|
+          new_subsect_data[ normalize_category(k3) ] = v3
+        end
+        subsect_data = new_subsect_data
+      end
+      subsect.data  = subsect_data
+      subsects << subsect
+    end
+    sect.subsects = subsects
+    @sects << sect
+  end
+end
+end # class JsonBuilder
+end # module Factbook

data/lib/factbook/counter.rb ADDED

@@ -0,0 +1,48 @@
+# encoding: utf-8
+module Factbook
+class Counter
+attr_reader :data
+def initialize
+  @data = {}
+end
+def count( page )
+  ## walk page data hash
+  #   add nodes to data
+  walk( page, page.data, @data )
+end
+private
+def walk( page, hin, hout )
+   hin.each do |k,v|
+     if v.is_a? Hash
+        hout2 =  hout[k] || { count: 0, codes: '' }
+        hout2[ :count ] += 1
+        ## delete codes if larger (treshhold) than x (e.g. 9)
+        hout2.delete( :codes )    if hout2[ :count ] > 9
+        codes = hout2[ :codes ]
+        if codes    ## note: might got deleted if passed treshhold (e.g. 9 entries)
+          codes << ' '  unless codes.empty?   ## add separator (space for now)
+          codes << page.info.country_code
+          hout2[ :codes ] = codes
+        end
+        hout[k] = hout2
+        walk( page, v, hout2 )
+     end
+   end
+end
+end # class Counter
+end # module Factbook

data/lib/factbook/normalize.rb ADDED

@@ -0,0 +1,43 @@
+# encoding: utf-8
+module Factbook
+  module NormalizeHelper
+def normalize_category( text )
+  ## note: fix typos/errors with double colons e.g. note::  (instead of note:)
+  text = text.strip
+  text = text.sub( /:+\z/, '' )      # remove trailing : if present -- note: allow (fix) note:: too, thus, use :+
+  text = text.strip
+  #######################################
+  ### special cases
+  ##   typos e.g ntoe => use note
+  text = 'note'                         if text == 'ntoe'
+  text = 'investment in fixed capital'  if text == 'investment if fixed capital'
+  ##  downcase
+  text = 'lowest point'    if text == 'Lowest point'
+  text = 'chief of state'  if text == 'Chief of state'
+  ##  spelling variant (use more popular one)
+  text = 'signed, but not ratified'     if text == 'signed but not ratified'
+  text = 'vectorborne diseases'         if text == 'vectorborne disease'
+  text = 'water contact disease'        if text == 'water contact diseases'
+  text = 'food or waterborne diseases'  if text == 'food or waterborne disease'
+  text = 'geographic coordinates'       if text == 'geographical coordinates'
+  text = 'note'                         if text == 'notes'
+  text = 'refugees (country of origin)' if text == 'refugees (countries of origin)'
+  ##    border countries (8):   -- remove (x) counter
+  text = 'border countries'   if text.start_with?( 'border countries')
+  text
+end
+  end # module NormalizeHelper
+end  # module Factbook

data/lib/factbook/page.rb CHANGED

@@ -39,18 +39,31 @@ class Page
   def initialize( code, opts={} )
     ### keep code - why? why not??  (use page_info/info e.g. info.country_code??)
-    if opts[:html]    ## note: expects ASCII-7BIT/BINARY encoding
-       ## for debugging and testing allow "custom" passed-in html page
-      html = opts[:html]
-    else
-      url_string =  SITE_BASE.gsub( '{code}', code )
-      ## note: expects ASCII-7BIT/BINARY encoding
-      html = fetch_page( url_string )   ## use PageFetcher class - why?? why not??
+    if opts[:json]
+      json = opts[:json]    ## note: json is (still) a string/text (NOT yet parsed to structured data)
+      b = JsonBuilder.from_string( json )
+    else  ## assume html
+      if opts[:html]    ## note: expects ASCII-7BIT/BINARY encoding
+         ## for debugging and testing allow "custom" passed-in html page
+        html = opts[:html]
+      else
+        url_string =  SITE_BASE.gsub( '{code}', code )
+        ## note: expects ASCII-7BIT/BINARY encoding
+        html = fetch_page( url_string )   ## use PageFetcher class - why?? why not??
+      end
+      b = Builder.from_string( html )
     end
-    b = Builder.from_string( html )
     @sects = b.sects
-    @info  = b.page_info    ## todo: change b.page_info to info too - why? why not??
+    @info  = b.info
+    ## todo/fix/quick hack:
+    ##  check for info opts hash entry - lets you overwrite page info
+    ##  -- use proper header to setup page info - why, why not??
+    if opts[:info]
+      info  = opts[:info]
+      @info = info
+    end
     @data = {}
     @sects.each do |sect|
@@ -83,43 +96,22 @@ class Page
   end
   ## add convenience (shortcut) accessors / attributes / fields / getters
-  ATTRIBUTES = {
-   'Introduction' => [[:background, 'Background' ]],
-   'Geography'    => [[:area,             'Area', 'total'],    ## convert to number -- why? why not??
-                      [:area_land,        'Area', 'land' ],
-                      [:area_water,       'Area', 'water'],
-                      [:area_note,        'Area', 'note' ],
-                      [:area_comparative, 'Area - comparative'],
-                      [:climate,          'Climate'],
-                      [:terrain,          'Terrain'],
-                      [:elevation_lowest, 'Elevation extremes', 'lowest point'],
-                      [:elevation_highest,'Elevation extremes', 'highest point'],
-                      [:resources,        'Natural resources']],
-  'People and Society' => [[:languages,         'Languages' ],
-                           [:religions,         'Religions' ],
-                           [:population,        'Population' ],
-                           [:population_growth, 'Population growth rate' ],
-                           [:birth_rate,        'Birth rate' ],
-                           [:death_rate,        'Death rate' ],
-                           [:migration_rate,    'Net migration rate' ],
-                           [:major_cities,      'Major urban areas - population' ]],
-  }
-  ATTRIBUTES.each do |section_title, attribs|
-    attribs.each do |attrib|
-      ## e.g.
-      ##    def background()  data['Introduction']['Background']['text']; end
-      ##    def location()    data['Geography']['Location']['text'];      end
-      ##    etc.
-      if attrib.size == 2
-        define_method attrib[0] do
-          @data.fetch( section_title, {} ).fetch( attrib[1], {} )['text']
-        end
-      else  ## assume size 3 for now
-        define_method attrib[0] do
-          @data.fetch( section_title, {} ).fetch( attrib[1], {} ).fetch( attrib[2], {} )['text']
-        end
+  ATTRIBUTES.each do |attrib|
+    ## e.g.
+    ##    def background()  data['Introduction']['Background']['text']; end
+    ##    def location()    data['Geography']['Location']['text'];      end
+    ##    etc.
+    if attrib.path.size == 1
+      define_method attrib.name.to_sym do
+        @data.fetch( attrib.category, {} ).
+              fetch( attrib.path[0], {} )['text']
+      end
+    else  ## assume size 2 for now
+      define_method attrib.name.to_sym do
+        @data.fetch( attrib.category, {} ).
+              fetch( attrib.path[0], {} ).
+              fetch( attrib.path[1], {} )['text']
       end
     end
   end