csvhuman 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d8038f1824b8772b53f57f7dc00c67ca902fd872
4
- data.tar.gz: a95430def44f77428a32b4ceb80f12c96d078199
3
+ metadata.gz: eac10294bb4add6317df3fd1eb2dcab35f75315d
4
+ data.tar.gz: 2072e628e65873a95584fa70fec0bf11a66e2322
5
5
  SHA512:
6
- metadata.gz: a2ee778e98288478930b92cb066b8d8ee65da61dca3d911e3f3de12c5ee3a0d0f3451db5ea1426c1c59a5a8cd6fc9ecb2bb509d8982e79b1ff0ce8d8c353765e
7
- data.tar.gz: e02189132d596e2b5abc654f55c9dcf925ed96d0d5008b8b2eba956cdfc0044827c374409e2e22e45c95791ed5db256c441210a36d81172c53f3178007f8cc40
6
+ metadata.gz: 931ada0926ae767b7b2348c237694ec35222ff06dfdbac7298527744c0ab99444778d5031355b106ed148551646e301c12d2cc32df8b0dd7001e2b91003c259f
7
+ data.tar.gz: 0ba952f578bfafb466a99073792d9c0173b24bf689571ec140ed8e61a7ef00f7addea40979bb8ce495d1a8a1266ce01845f5e551653f02afb15c0ea516021bf3
@@ -2,20 +2,38 @@ HISTORY.md
2
2
  Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
+ config/attributes.csv
6
+ config/langs.csv
7
+ config/tags.csv
8
+ config/types.csv
9
+ config/versions.csv
5
10
  lib/csvhuman.rb
6
11
  lib/csvhuman/base.rb
7
12
  lib/csvhuman/column.rb
8
13
  lib/csvhuman/converter.rb
14
+ lib/csvhuman/doc/helper.rb
15
+ lib/csvhuman/doc/schema.rb
9
16
  lib/csvhuman/reader.rb
10
17
  lib/csvhuman/tag.rb
11
18
  lib/csvhuman/version.rb
19
+ test/data/airports.csv
20
+ test/data/ebola.csv
21
+ test/data/hdx/ebola_treatment_centres.csv
22
+ test/data/hdx/phl_haima_houses_damaged.csv
23
+ test/data/hdx/zika_cases.csv
12
24
  test/data/sample1.csv
13
25
  test/data/sample2.csv
14
26
  test/data/sample3.csv
15
27
  test/data/sample4.csv
16
28
  test/data/test.csv
29
+ test/data/unhcr.csv
17
30
  test/helper.rb
31
+ test/test_doc.rb
32
+ test/test_hdx.rb
18
33
  test/test_header_converter.rb
34
+ test/test_misc.rb
19
35
  test/test_reader.rb
20
36
  test/test_samples.rb
21
37
  test/test_tags.rb
38
+ test/test_type_converters.rb
39
+ test/test_type_mappings.rb
@@ -0,0 +1,54 @@
1
+ attribute,since,category,tags,description
2
+ adolescents,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"Adolescents, loosely defined (precise age range varies); may overlap +children and +adult. You can optionally create custom attributes in addition to this to add precise age ranges, e.g. ""+adolescents +age12_17""."
3
+ adults,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"Adults, loosely defined (precise age range varies); may overlap +adolescents and +elderly. You can optionally create custom attributes in addition to this to add precise age ranges, e.g. ""+adults +age18_64""."
4
+ children,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"The associated hashtag applies to non-adults, loosely defined (precise age range varies; may overlap +infants and +adolescents). You can optionally create custom attributes in addition to this to add precise age ranges, e.g. ""+children +age3_11""."
5
+ elderly,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"Elderly people, loosely defined (precise age range varies). May overlap +adults. You can optionally create custom attributes in addition to this to add precise age ranges, e.g. ""+elderly +age65plus""."
6
+ f,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,Female people. See also +m and +i.
7
+ hh,1.1,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"Households (vs +ind for individual people). The exact definition of ""household"" may vary among aid organisations."
8
+ i,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,Intersex or non-gender-binary people. Use this attribute for any groups who do not identify as male (+m) or female (+f).
9
+ ind,1.1,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,Individual people (vs +hh for households).
10
+ infants,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"Infant children, loosely defined (precise age range varies; may overlap +children). You can optionally create custom attributes in addition to this to add precise age ranges, e.g. ""+infants +age0_2""."
11
+ m,1.0,(2) Organisation and activity attributes,#affected #inneed #population #reached #targeted,Male people. See also +f and +i.
12
+ activity,1.1,(2) Organisation and activity attributes,#activity,"The implementers classify this activity as an ""activity"" proper (may imply different hierarchical levels in different contexts)."
13
+ cluster,1.1,(2) Organisation and activity attributes,#sector,Identifies a sector as a formal IASC humanitarian cluster.
14
+ funder,1.0,(2) Organisation and activity attributes,#org,Funding org/agency (e.g. donor).
15
+ impl,1.0,(2) Organisation and activity attributes,#org,Implementing partner.
16
+ prog,1.0,(2) Organisation and activity attributes,#org,Programming org/agency.
17
+ programme,1.1,(2) Organisation and activity attributes,#activity,"The implementers classify this activity as a ""programme"" (may imply different hierarchical levels in different contexts)."
18
+ project,1.1,(2) Organisation and activity attributes,#activity,"The implementers classify this activity as a ""project"" (may imply different hierarchical levels in different contexts)."
19
+ provided,1.1,(2) Organisation and activity attributes,#item #service,"Refers to a #service, #item, etc. that has been provided to people in need."
20
+ used,1.1,(3) Classification attributes,#item #service,"Refers to a #service, #item, etc. that affected people have actually consumed or otherwise taken advantage of."
21
+ code,1.0,(3) Classification attributes,#activity #adm1 #adm2 #adm3 #adm4 #adm5 #beneficiary #cause #channel #country #crisis #currency #event #group #impact #indicator #item #loc #modality #need #org #output #region #sector #service #severity #status #subsector,"A unique, machine-readable code."
22
+ type,1.0,(4) Geographical attributes,#access #activity #beneficiary #capacity #cause #contact #crisis #description #event #frequency #group #impact #indicator #item #loc #need #operations #org #output #sector #service #severity #subsector,"Types or categories. Use with #org, #loc, #indicator, etc to provide classification information."
23
+ bounds,1.0,(4) Geographical attributes,#geo,Boundary data (e.g. inline GeoJSON).
24
+ coord,1.0,(4) Geographical attributes,#geo,Geodetic coordinates (lat+lon together).
25
+ dest,1.1,(4) Geographical attributes,#adm1 #adm2 #adm3 #adm4 #adm5 #country #loc #region,Place of destination (intended or actual).
26
+ elevation,1.0,(4) Geographical attributes,#geo,Elevation above sea level (usually metres).
27
+ lat,1.0,(4) Geographical attributes,#geo,Latitude (decimal degrees preferred).
28
+ lon,1.0,(4) Geographical attributes,#geo,Longitude (decimal degrees preferred).
29
+ origin,1.1,(5) Date attributes,#adm1 #adm2 #adm3 #adm4 #adm5 #country #loc #region,"The data describes places of origin (intended or actual), e.g. the country of origin for displaced people."
30
+ approved,1.0,(5) Date attributes,#date,Date or time when something was approved.
31
+ canceled,1.0,(5) Date attributes,#date,Date or time when something (e.g. an #activity) was canceled.
32
+ converted,1.1,(5) Date attributes,#date,Date or time used for converting a monetary value to another currency.
33
+ end,1.0,(5) Date attributes,#date,Date or time when something finished or will finish.
34
+ occurred,1.0,(5) Date attributes,#date,Date or time when something took place.
35
+ reported,1.0,(5) Date attributes,#date,Date or time when the information was reported.
36
+ start,1.0,(6) Impact attributes,#date,Date or time when something started or will start.
37
+ abducted,1.1,(6) Impact attributes,#affected #inneed #reached #targeted,Hashtag refers to people who have been abducted.
38
+ displaced,1.0,(6) Impact attributes,#affected #inneed #population #reached #targeted,Displaced people or households. Refers to all types of displacement: use +idps or +refugees to be more specific.
39
+ idps,1.0,(6) Impact attributes,#affected #inneed #population #reached #targeted,Internally-displaced people or households. More specific than +displaced.
40
+ incamp,1.0,(6) Impact attributes,#affected #inneed #population #reached #targeted,Located in camps.
41
+ infected,1.0,(6) Impact attributes,#affected #inneed #reached #targeted,People infected with a disease.
42
+ injured,1.0,(6) Impact attributes,#affected #inneed #reached #targeted,People injured.
43
+ killed,1.0,(6) Impact attributes,#affected,People killed.
44
+ noncamp,1.0,(6) Impact attributes,#affected #inneed #population #reached #targeted,Not located in camps.
45
+ refugees,1.0,(7) General attributes,#affected #inneed #population #reached #targeted,Refugee people or households. More specific than +displaced.
46
+ email,1.0,(7) General attributes,#contact,Email address.
47
+ id,1.1,(7) General attributes,#meta,Use with #meta to provide internal identifiers for data records.
48
+ label,1.0,(7) General attributes,,Text labels (for a table or chart).
49
+ name,1.0,(7) General attributes,#activity #adm1 #adm2 #adm3 #adm4 #adm5 #beneficiary #cause #channel #contact #country #crisis #event #group #impact #indicator #item #loc #modality #need #org #output #region #sector #service #severity #status #subsector,"Human-readable name, title, or label."
50
+ num,1.0,(7) General attributes,#capacity #indicator #output,"The data consists of quantitative, numeric information."
51
+ phone,1.0,(7) General attributes,#contact,The data consists of #contact phone numbers.
52
+ source,1.0,(7) General attributes,#meta,Information source for the data in the row or record.
53
+ text,1.0,(7) General attributes,#indicator,"The data consists of qualitative, narrative textual information."
54
+ url,1.0,(7) General attributes,#activity #contact #meta #org #service,"The data consists of web links related to the main hashtag (e.g. for an #org, #service, #activity, #loc, etc)."
@@ -0,0 +1,14 @@
1
+ code, name+en, name
2
+ ar, Arabic,
3
+ en, English, English
4
+ es, Spanish, Español
5
+ de, German, Deutsch
6
+ fa, Dari / Farsi / Persian,
7
+ fr, French,
8
+ ms, Malay,
9
+ ps, Pashto,
10
+ ru, Russian,
11
+ sw, Swahili,
12
+ tl, Tagalog,
13
+ uk, Ukrainian,
14
+ ur, Urdu,
@@ -0,0 +1,45 @@
1
+ tag,type,since,category,attributes,description
2
+ adm1,,1.0,(1) Places,+code +dest +name +origin,Top-level subnational administrative area (e.g. a governorate in Syria).
3
+ adm2,,1.0,(1) Places,+code +dest +name +origin,Second-level subnational administrative area (e.g. a subdivision in Bangladesh).
4
+ adm3,,1.0,(1) Places,+code +dest +name +origin,Third-level subnational administrative area (e.g. a subdistrict in Afghanistan).
5
+ adm4,,1.0,(1) Places,+code +dest +name +origin,Fourth-level subnational administrative area (e.g. a barangay in the Philippines).
6
+ adm5,,1.0,(1) Places,+code +dest +name +origin,Fifth-level subnational administrative area (e.g. a ward of a city).
7
+ country,,1.0,(1) Places,+code +dest +name +origin,Country (often left implied in a dataset). Also sometimes known as admin level 0.
8
+ geo,,1.0,(1) Places,+bounds +coord +elevation +lat +lon,"Geodetic geometry information (points, lines, shapes). Use for latitude and longitude, as well as bounds information."
9
+ loc,,1.0,(1) Places,+code +dest +name +origin +type,"Any general location, such as a village, camp, or clinic."
10
+ region,,1.0,(2) People and households,+code +dest +name +origin,"A broad, supra- or cross-national geographical region (e.g. Sahel, Horn of Africa, Central Asia, Caribbean). Not to be confused with ""region"" used as the name of a subnational area (#adm1) in some countries."
11
+ affected,number,1.0,(2) People and households,+abducted +adolescents +adults +children +displaced +elderly +f +hh +i +idps +incamp +ind +infants +infected +injured +killed +m +noncamp +refugees,Number of people or households affected by an emergency. Subset of #population; superset of #inneed.
12
+ beneficiary,,1.0,(2) People and households,+code +name +type,"General (non-numeric) information about a person or group meant to benefit from aid activities, e.g. ""lactating women""."
13
+ inneed,number,1.0,(2) People and households,+abducted +adolescents +adults +children +displaced +elderly +f +hh +i +idps +incamp +ind +infants +infected +injured +m +noncamp +refugees,Number of people or households in need of humanitarian assistance. Subset of #affected; superset of #targeted.
14
+ population,number,1.0,(2) People and households,+adolescents +adults +children +displaced +elderly +f +hh +i +idps +incamp +ind +infants +m +noncamp +refugees,"General population number for an area or location, regardless of their specific humanitarian needs."
15
+ reached,number,1.0,(2) People and households,+abducted +adolescents +adults +children +displaced +elderly +f +hh +i +idps +incamp +ind +infants +infected +injured +m +noncamp +refugees,Number of people or households reached with humanitarian assistance. Subset of #targeted.
16
+ respondee,,1.1,(2) People and households,,"Descriptive information, such as name, identifier, or traits, for a single respondee (person, household, etc.) in survey-style data."
17
+ targeted,number,1.0,(3) Responses and other operations,+abducted +adolescents +adults +children +displaced +elderly +f +hh +i +idps +incamp +ind +infants +infected +injured +m +noncamp +refugees,Number of people or households targeted for humanitarian assistance. Subset of #inneed; superset of #reached.
18
+ access,,1.1,(3) Responses and other operations,+type,"Accessiblity and constraints on access to a market, distribution point, facility, etc."
19
+ activity,,1.0,(3) Responses and other operations,+activity +code +name +programme +project +type +url,"A programme, project, or other activity. This hashtag applies to all levels; use the attributes +activity, +project, or +programme to distinguish different hierarchical levels."
20
+ capacity,,1.0,(3) Responses and other operations,+num +type,"The response capacity of the entity being described (e.g. ""25 beds"")."
21
+ contact,,1.0,(3) Responses and other operations,+email +name +phone +type +url,Contact information for the subject of a data record (e.g. an activity).
22
+ frequency,,1.1,(3) Responses and other operations,+type,The frequency with which something occurs.
23
+ indicator,,1.0,(3) Responses and other operations,+code +name +num +text +type,"A general hashtag for an indicator being tracked. See also #output, #capacity, #need, #impact, #severity, #affected, #inneed, #targeted, and #reached for more-specific indicator-related hashtags."
24
+ item,,1.1,(3) Responses and other operations,+code +name +provided +type +used,"Physical things provided, stored, shipped, available, used, etc."
25
+ need,,1.1,(3) Responses and other operations,+code +name +type,"A(n) (unfulfilled) need for an affected person, household, group, or population."
26
+ operations,,1.0,(3) Responses and other operations,+type,"Information that affects humanitarian operations, such as a restriction on movement or road closure."
27
+ org,,1.0,(3) Responses and other operations,+code +funder +impl +name +prog +type +url,"An organisation contributing to a humanitarian emergency response, e.g. a local government, community-based organisation, NGO, agency, donor, or law-enforcement or military unit. Use #group for organisations that are not part of the emergency response (e.g. a paramilitary group)."
28
+ output,,1.0,(3) Responses and other operations,+code +name +num +type,"An output indicator (e.g. ""number of water-purification kits distributed""). A more-specific alternative to #indicator, especially for 3W-style activity reports."
29
+ sector,,1.0,(3) Responses and other operations,+cluster +code +name +type,A humanitarian cluster or sector.
30
+ service,,1.1,(3) Responses and other operations,+code +name +provided +type +url +used,"A service used or needed by an affected person, household, group, or population."
31
+ subsector,,1.0,(4) Cash and finance,+code +name +type,A humanitarian subsector.
32
+ channel,,1.1,(4) Cash and finance,+code +name,The detailed method of delivering aid (e.g. smartcard vs mobile transfer). More specific than #modality.
33
+ currency,,1.1,(4) Cash and finance,+code,"Name or ISO 4217 currency code for all financial #value cells in the row (e.g. ""EUR""). Typically used together with #value in financial or cash data."
34
+ modality,,1.1,(4) Cash and finance,+code +name,"The means by which an aid activity is accomplished. For cash transfers, values might include ""cash"", ""vouchers"", ""in-kind"", etc. May also be used for other types of modalities in other contexts."
35
+ value,number,1.1,"(5) Crises, incidents, and events",,"A monetary value, such as the price of goods in a market, a project budget, or the amount of cash transferred to beneficiaries. May be used together with #currency in financial or cash data."
36
+ cause,,1.0,"(5) Crises, incidents, and events",+code +name +type,"The cause of an event, crisis, etc."
37
+ crisis,,1.0,"(5) Crises, incidents, and events",+code +name +type,A humanitarian emergency.
38
+ event,,1.0,"(5) Crises, incidents, and events",+code +name +type,"An individual event or incident within a crisis/emergency, such as a (localised) flood, bridge collapse, or conflict."
39
+ group,,1.0,"(5) Crises, incidents, and events",+code +name +type,"A non-humanitarian group (of any type) related to humanitarian crisis (e.g., a paramilitary group) Use #org instead for a humanitarian organisation such as an NGO, contributing to the humanitarian response."
40
+ impact,,1.0,"(5) Crises, incidents, and events",+code +name +type,The impact of a crisis on a group or other entity.
41
+ severity,,1.0,(6) Metadata,+code +name +type,Severity of the crisis or event.
42
+ date,date,1.0,(6) Metadata,+approved +canceled +converted +end +occurred +reported +start,"Date related to the data in the record applies. Preferred format is ISO 8610 (e.g. ""2015-06-01"", ""2015-Q1"", etc.)"
43
+ description,,1.0,(6) Metadata,+type,Long description for a data record.
44
+ meta,,1.0,(6) Metadata,+id +source +url,Metadata about a row.
45
+ status,,1.0,(6) Metadata,+code +name,"Project/activity status description (such as ""planned"", ""active"", ""canceled"", or ""complete"")."
@@ -0,0 +1,7 @@
1
+ type,description
2
+ text,
3
+ number,
4
+ url,
5
+ email,
6
+ phone,
7
+ date,
@@ -0,0 +1,4 @@
1
+ version, date
2
+ 1.0, 2016-03-18
3
+ 1.1, 2018-04-30
4
+ 1.2,
@@ -10,7 +10,8 @@ require 'csvhuman/column'
10
10
  require 'csvhuman/converter'
11
11
  require 'csvhuman/reader'
12
12
 
13
-
13
+ require 'csvhuman/doc/helper.rb'
14
+ require 'csvhuman/doc/schema.rb'
14
15
 
15
16
 
16
17
  # say hello
@@ -18,4 +18,113 @@ HEADER_CONVERTERS = {
18
18
  gsub( /[^\w]+/, '' ).to_sym }
19
19
  }
20
20
 
21
+
22
+
23
+
24
+ def self.guess_type( name, attributes )
25
+ if name == 'date'
26
+ if attributes.include?( 'year' )
27
+ Integer ## just the year (e.g. 2011); use an integer number
28
+ else
29
+ Date
30
+ end
31
+ ## todo/fix: add more well-known names with num required!!!
32
+ elsif ['affected', 'inneed', 'targeted', 'reached', 'population'].include?( name )
33
+ Integer
34
+ else
35
+ ## check attributes
36
+ if attributes.nil? || attributes.empty?
37
+ String ## assume (default to) string
38
+ elsif attributes.include?( 'num' ) ||
39
+ attributes.include?( 'id') ## assume id is (always) a rowid - why? why not?
40
+ Integer
41
+ elsif attributes.include?( 'date' ) ### todo/check: exists +date?
42
+ Date
43
+ elsif name == 'geo' && (attributes.include?('lat') ||
44
+ attributes.include?('lon') ||
45
+ attributes.include?('elevation'))
46
+ Float
47
+ elsif attributes.include?( 'killed' ) ||
48
+ attributes.include?( 'injured' ) ||
49
+ attributes.include?( 'infected' ) ||
50
+ attributes.include?( 'displaced' ) ||
51
+ attributes.include?( 'idps' ) ||
52
+ attributes.include?( 'refugees' ) ||
53
+ attributes.include?( 'abducted' ) ||
54
+ attributes.include?( 'threatened' ) ||
55
+ attributes.include?( 'affected' ) ||
56
+ attributes.include?( 'inneed' ) ||
57
+ attributes.include?( 'targeted' ) ||
58
+ attributes.include?( 'reached' )
59
+ Integer
60
+ else
61
+ String ## assume (default to) string
62
+ end
63
+ end
64
+ end
65
+
66
+
67
+ ## convert guess_type to proc (is there a better/idomatic way)?
68
+ # ->(name, attributes) { guess_type( name, attributes ) }
69
+ ## TYPE_MAPPING_GUESS = Kernel.method( :guess_type )
70
+
71
+ TYPE_MAPPINGS = {
72
+ ## always returns string (that is, keep as is (assumes always string values))
73
+ none: ->(name, attributes) { String },
74
+ guess: ->(name, attributes) { guess_type( name, attributes ) },
75
+ }
76
+
77
+ ## add aliases (check - is there a better/idomatic way?)
78
+ TYPE_MAPPINGS[ :default] = TYPE_MAPPINGS[:guess] ## alias for guess
79
+ TYPE_MAPPINGS[ :all ] = TYPE_MAPPINGS[:guess] ## alias for guess (yes, another one - why? why not?)
80
+
81
+
82
+
83
+
84
+
85
+ def self.convert_to_i( value )
86
+ if value.nil? || value.empty?
87
+ nil ## return nil - why? why not?
88
+ else
89
+ Integer( value )
90
+ end
91
+ end
92
+
93
+ def self.convert_to_f( value )
94
+ if value.nil? || value.empty?
95
+ nil ## return nil - why? why not?
96
+ else
97
+ ## todo/fix: add support for NaN, Inf, -Inf etc.
98
+ ## how to deal with conversion errors (throw exception? ignore? NaN? why? why not?)
99
+ Float( value )
100
+ end
101
+ end
102
+
103
+ def self.convert_to_date( value )
104
+ if value.nil? || value.empty?
105
+ nil ## return nil - why? why not?
106
+ else
107
+ ## todo/fix: add support for more formats
108
+ ## how to deal with conversion errors (throw exception? ignore? why? why not?)
109
+ if value =~ /\d{4}-\d{1,2}-\d{1,2}/ ### todo: check if 2014-1-9 works for strptime too (leading zero rquired)?
110
+ Date.strptime( value, "%Y-%m-%d" ) # 2014-11-09
111
+ elsif value =~ /\d{1,2}\/\d{1,2}\/\d{4}/
112
+ Date.strptime( value, "%d/%m/%Y" ) # 09/11/2014
113
+ else
114
+ ## todo/fix: throw argument/value error - why? why not
115
+ nil
116
+ end
117
+ end
118
+ end
119
+
120
+
121
+
122
+ TYPE_CONVERTERS = {
123
+ Integer => ->(value) { convert_to_i(value) },
124
+ Float => ->(value) { convert_to_f(value) },
125
+ Date => ->(value) { convert_to_date(value) },
126
+ }
127
+
128
+
129
+
21
130
  end # class CsvHuman
@@ -0,0 +1,108 @@
1
+ # encoding: utf-8
2
+
3
+ class CsvHuman
4
+ module DocHelper
5
+
6
+
7
+ HASHTAG_LINE_RX= /^
8
+ \s*
9
+ \#
10
+ (?<name>[a-z][a-z0-9]+)
11
+ \s*
12
+ $/x
13
+
14
+ def match_hashtag( line )
15
+ if (m=HASHTAG_LINE_RX.match(line))
16
+ puts "hashtag >#{m[:name]}<"
17
+ m
18
+ else
19
+ nil
20
+ end
21
+ end
22
+
23
+
24
+
25
+ ## note: attrib might be one letter only (e.g.) +m,+f, etc.
26
+ ATTRIBUTE_LINE_RX= /^
27
+ \s*
28
+ \+
29
+ (?<name>[a-z][a-z0-9]*)
30
+ \s*
31
+ $/x
32
+
33
+ def match_attribute( line )
34
+ if (m=ATTRIBUTE_LINE_RX.match(line))
35
+ puts "attrib >#{m[:name]}<"
36
+ m
37
+ else
38
+ false
39
+ end
40
+ end
41
+
42
+
43
+
44
+ ##
45
+ ## e.g. 1.1. Places
46
+ ## 2.1. Sex- and-age disaggregation (SADD) attributes
47
+
48
+ HEADING_LINE_RX=/^
49
+ \s*
50
+ (?<level1>[1-9])
51
+ \.
52
+ (?<level2>[1-9])
53
+ \.
54
+ \s+
55
+ (?<title>.+?)
56
+ \s*
57
+ $/x
58
+
59
+ def match_heading( line )
60
+ if (m=HEADING_LINE_RX.match(line))
61
+ puts "heading #{m[:level1]}.#{m[:level2]}. (#{m[:level2]}) >#{m[:title]}<"
62
+ m
63
+ else
64
+ false
65
+ end
66
+ end
67
+
68
+
69
+
70
+ TYPE_RX = /Every value must be a (?<type>[a-z]+)./
71
+ def match_type( line )
72
+ if (m=TYPE_RX.match(line))
73
+ puts "type: >#{m[:type]}<"
74
+ m
75
+ else
76
+ false
77
+ end
78
+ end
79
+
80
+
81
+
82
+ SINCE_HXL_RX = /Since HXL (?<version>[1]\.[0-9])\.?/
83
+ def match_since_hxl( line )
84
+ if (m=SINCE_HXL_RX.match(line))
85
+ puts "version: >#{m[:version]}<"
86
+ m
87
+ else
88
+ false
89
+ end
90
+ end
91
+
92
+
93
+
94
+ def split_descr( line )
95
+ if( m=match_since_hxl( line ))
96
+ version = m[:version]
97
+ ## remove "Since HXL 1.0" from text
98
+ text = line.gsub( SINCE_HXL_RX, '' ).strip
99
+ else
100
+ version = '?'
101
+ text = line
102
+ end
103
+ [text,version]
104
+ end
105
+
106
+
107
+ end # module DocHelper
108
+ end # class CsvHuman
@@ -0,0 +1,151 @@
1
+ # encoding: utf-8
2
+
3
+ class CsvHuman
4
+ class Doc ## tags and attributes (schema) reader / converter (txt to csv)
5
+ include DocHelper
6
+
7
+
8
+
9
+ def self.read_attributes( path )
10
+ self.open( path ) { |doc| doc.parse_attributes }
11
+ end
12
+
13
+ def self.read_tags( path )
14
+ self.open( path ) { |doc| doc.parse_tags }
15
+ end
16
+
17
+ def self.open( path, mode=nil, &block ) ## rename path to filename or name - why? why not?
18
+
19
+ ## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
20
+ f = File.open( path, mode ? mode : 'r:bom|utf-8' )
21
+ doc = self.new( f )
22
+
23
+ # handle blocks like Ruby's open(), not like the (old old) CSV library
24
+ if block_given?
25
+ begin
26
+ block.call( doc )
27
+ ensure
28
+ f.close
29
+ end
30
+ else
31
+ doc ## note: caller responsible for closing (todo/fix: add close,closed? to doc!!!)
32
+ end
33
+ end # method self.open
34
+
35
+
36
+
37
+
38
+ def initialize( str_or_readable )
39
+ # note: must (only) support/respond_to read_line
40
+ @input = str_or_readable
41
+ end
42
+
43
+
44
+ def parse_attributes
45
+
46
+ attrib = nil
47
+ category = nil
48
+ descr = nil
49
+ version = nil
50
+ tags = []
51
+
52
+ next_line = nil ## e.g. set to :descr
53
+
54
+ attribs = []
55
+
56
+ @input.each_line do |line|
57
+ line = line.chomp( '' )
58
+
59
+ line = line.strip ## remove leading and trailing spaces
60
+
61
+
62
+ next if line.empty? || line.start_with?( '%' ) ## skip blank lines and comment lines
63
+
64
+ if next_line == :descr
65
+ ## auto-capture next line (if descr reset to nil)
66
+ descr, version = split_descr( line )
67
+ puts "descr >#{descr}<, version >#{version}<"
68
+
69
+ next_line = nil
70
+ elsif (m=match_heading( line ))
71
+ category = "(#{m[:level2]}) #{m[:title]}"
72
+ elsif (m=match_attribute( line ))
73
+ if attrib
74
+ attribs << [attrib, version, category, tags.join( ' ' ), descr]
75
+ end
76
+
77
+ attrib = m[:name]
78
+ tags = []
79
+ next_line = :descr ## reset descr to nil - will auto-capture next line
80
+ elsif (m=match_hashtag( line ))
81
+ tags << "##{m[:name]}"
82
+ end
83
+ end
84
+
85
+ if attrib
86
+ attribs << [attrib, version, category, tags.join( ' ' ), descr]
87
+ end
88
+
89
+ attribs
90
+ end # method parse_attributes
91
+
92
+
93
+
94
+ def parse_tags
95
+
96
+ tag = nil
97
+ type = nil
98
+ category = nil
99
+ descr = nil
100
+ version = nil
101
+ attribs = []
102
+
103
+ next_line = nil ## e.g. set to :descr
104
+
105
+
106
+ tags = []
107
+
108
+ @input.each_line do |line|
109
+ line = line.chomp( '' )
110
+
111
+ line = line.strip ## remove leading and trailing spaces
112
+
113
+
114
+ next if line.empty? || line.start_with?( '%' ) ## skip blank lines and comment lines
115
+
116
+ if next_line == :descr
117
+ ## auto-capture next line (if descr reset to nil)
118
+ descr, version = split_descr( line )
119
+
120
+ ## descr = "(2) People and households" if descr == "(2) Surveys and assessments"
121
+
122
+ puts "descr >#{descr}<, version >#{version}<"
123
+
124
+ next_line = nil
125
+ elsif (m=match_heading( line ))
126
+ category = "(#{m[:level2]}) #{m[:title]}"
127
+ elsif (m=match_type( line ))
128
+ type = m[:type]
129
+ elsif (m=match_hashtag( line ))
130
+ if tag
131
+ tags << [tag, type, version, category, attribs.join( ' ' ), descr]
132
+ end
133
+
134
+ tag = m[:name]
135
+ attribs = []
136
+ type = nil
137
+ next_line = :descr ## reset descr to nil - will auto-capture next line
138
+ elsif (m=match_attribute( line ))
139
+ attribs << "+#{m[:name]}"
140
+ end
141
+ end
142
+
143
+ if tag
144
+ tags << [tag, type, version, category, attribs.join( ' ' ), descr]
145
+ end
146
+
147
+ tags
148
+ end # method parse_tags
149
+
150
+ end # class Doc
151
+ end # class CsvHuman