csvhuman 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d8038f1824b8772b53f57f7dc00c67ca902fd872
4
- data.tar.gz: a95430def44f77428a32b4ceb80f12c96d078199
3
+ metadata.gz: eac10294bb4add6317df3fd1eb2dcab35f75315d
4
+ data.tar.gz: 2072e628e65873a95584fa70fec0bf11a66e2322
5
5
  SHA512:
6
- metadata.gz: a2ee778e98288478930b92cb066b8d8ee65da61dca3d911e3f3de12c5ee3a0d0f3451db5ea1426c1c59a5a8cd6fc9ecb2bb509d8982e79b1ff0ce8d8c353765e
7
- data.tar.gz: e02189132d596e2b5abc654f55c9dcf925ed96d0d5008b8b2eba956cdfc0044827c374409e2e22e45c95791ed5db256c441210a36d81172c53f3178007f8cc40
6
+ metadata.gz: 931ada0926ae767b7b2348c237694ec35222ff06dfdbac7298527744c0ab99444778d5031355b106ed148551646e301c12d2cc32df8b0dd7001e2b91003c259f
7
+ data.tar.gz: 0ba952f578bfafb466a99073792d9c0173b24bf689571ec140ed8e61a7ef00f7addea40979bb8ce495d1a8a1266ce01845f5e551653f02afb15c0ea516021bf3
@@ -2,20 +2,38 @@ HISTORY.md
2
2
  Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
+ config/attributes.csv
6
+ config/langs.csv
7
+ config/tags.csv
8
+ config/types.csv
9
+ config/versions.csv
5
10
  lib/csvhuman.rb
6
11
  lib/csvhuman/base.rb
7
12
  lib/csvhuman/column.rb
8
13
  lib/csvhuman/converter.rb
14
+ lib/csvhuman/doc/helper.rb
15
+ lib/csvhuman/doc/schema.rb
9
16
  lib/csvhuman/reader.rb
10
17
  lib/csvhuman/tag.rb
11
18
  lib/csvhuman/version.rb
19
+ test/data/airports.csv
20
+ test/data/ebola.csv
21
+ test/data/hdx/ebola_treatment_centres.csv
22
+ test/data/hdx/phl_haima_houses_damaged.csv
23
+ test/data/hdx/zika_cases.csv
12
24
  test/data/sample1.csv
13
25
  test/data/sample2.csv
14
26
  test/data/sample3.csv
15
27
  test/data/sample4.csv
16
28
  test/data/test.csv
29
+ test/data/unhcr.csv
17
30
  test/helper.rb
31
+ test/test_doc.rb
32
+ test/test_hdx.rb
18
33
  test/test_header_converter.rb
34
+ test/test_misc.rb
19
35
  test/test_reader.rb
20
36
  test/test_samples.rb
21
37
  test/test_tags.rb
38
+ test/test_type_converters.rb
39
+ test/test_type_mappings.rb
@@ -0,0 +1,54 @@
1
+ attribute,since,category,tags,description
2
+ adolescents,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"Adolescents, loosely defined (precise age range varies); may overlap +children and +adult. You can optionally create custom attributes in addition to this to add precise age ranges, e.g. ""+adolescents +age12_17""."
3
+ adults,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"Adults, loosely defined (precise age range varies); may overlap +adolescents and +elderly. You can optionally create custom attributes in addition to this to add precise age ranges, e.g. ""+adults +age18_64""."
4
+ children,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"The associated hashtag applies to non-adults, loosely defined (precise age range varies; may overlap +infants and +adolescents). You can optionally create custom attributes in addition to this to add precise age ranges, e.g. ""+children +age3_11""."
5
+ elderly,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"Elderly people, loosely defined (precise age range varies). May overlap +adults. You can optionally create custom attributes in addition to this to add precise age ranges, e.g. ""+elderly +age65plus""."
6
+ f,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,Female people. See also +m and +i.
7
+ hh,1.1,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"Households (vs +ind for individual people). The exact definition of ""household"" may vary among aid organisations."
8
+ i,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,Intersex or non-gender-binary people. Use this attribute for any groups who do not identify as male (+m) or female (+f).
9
+ ind,1.1,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,Individual people (vs +hh for households).
10
+ infants,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"Infant children, loosely defined (precise age range varies; may overlap +children). You can optionally create custom attributes in addition to this to add precise age ranges, e.g. ""+infants +age0_2""."
11
+ m,1.0,(2) Organisation and activity attributes,#affected #inneed #population #reached #targeted,Male people. See also +f and +i.
12
+ activity,1.1,(2) Organisation and activity attributes,#activity,"The implementers classify this activity as an ""activity"" proper (may imply different hierarchical levels in different contexts)."
13
+ cluster,1.1,(2) Organisation and activity attributes,#sector,Identifies a sector as a formal IASC humanitarian cluster.
14
+ funder,1.0,(2) Organisation and activity attributes,#org,Funding org/agency (e.g. donor).
15
+ impl,1.0,(2) Organisation and activity attributes,#org,Implementing partner.
16
+ prog,1.0,(2) Organisation and activity attributes,#org,Programming org/agency.
17
+ programme,1.1,(2) Organisation and activity attributes,#activity,"The implementers classify this activity as a ""programme"" (may imply different hierarchical levels in different contexts)."
18
+ project,1.1,(2) Organisation and activity attributes,#activity,"The implementers classify this activity as a ""project"" (may imply different hierarchical levels in different contexts)."
19
+ provided,1.1,(2) Organisation and activity attributes,#item #service,"Refers to a #service, #item, etc. that has been provided to people in need."
20
+ used,1.1,(3) Classification attributes,#item #service,"Refers to a #service, #item, etc. that affected people have actually consumed or otherwise taken advantage of."
21
+ code,1.0,(3) Classification attributes,#activity #adm1 #adm2 #adm3 #adm4 #adm5 #beneficiary #cause #channel #country #crisis #currency #event #group #impact #indicator #item #loc #modality #need #org #output #region #sector #service #severity #status #subsector,"A unique, machine-readable code."
22
+ type,1.0,(4) Geographical attributes,#access #activity #beneficiary #capacity #cause #contact #crisis #description #event #frequency #group #impact #indicator #item #loc #need #operations #org #output #sector #service #severity #subsector,"Types or categories. Use with #org, #loc, #indicator, etc to provide classification information."
23
+ bounds,1.0,(4) Geographical attributes,#geo,Boundary data (e.g. inline GeoJSON).
24
+ coord,1.0,(4) Geographical attributes,#geo,Geodetic coordinates (lat+lon together).
25
+ dest,1.1,(4) Geographical attributes,#adm1 #adm2 #adm3 #adm4 #adm5 #country #loc #region,Place of destination (intended or actual).
26
+ elevation,1.0,(4) Geographical attributes,#geo,Elevation above sea level (usually metres).
27
+ lat,1.0,(4) Geographical attributes,#geo,Latitude (decimal degrees preferred).
28
+ lon,1.0,(4) Geographical attributes,#geo,Longitude (decimal degrees preferred).
29
+ origin,1.1,(5) Date attributes,#adm1 #adm2 #adm3 #adm4 #adm5 #country #loc #region,"The data describes places of origin (intended or actual), e.g. the country of origin for displaced people."
30
+ approved,1.0,(5) Date attributes,#date,Date or time when something was approved.
31
+ canceled,1.0,(5) Date attributes,#date,Date or time when something (e.g. an #activity) was canceled.
32
+ converted,1.1,(5) Date attributes,#date,Date or time used for converting a monetary value to another currency.
33
+ end,1.0,(5) Date attributes,#date,Date or time when something finished or will finish.
34
+ occurred,1.0,(5) Date attributes,#date,Date or time when something took place.
35
+ reported,1.0,(5) Date attributes,#date,Date or time when the information was reported.
36
+ start,1.0,(6) Impact attributes,#date,Date or time when something started or will start.
37
+ abducted,1.1,(6) Impact attributes,#affected #inneed #reached #targeted,Hashtag refers to people who have been abducted.
38
+ displaced,1.0,(6) Impact attributes,#affected #inneed #population #reached #targeted,Displaced people or households. Refers to all types of displacement: use +idps or +refugees to be more specific.
39
+ idps,1.0,(6) Impact attributes,#affected #inneed #population #reached #targeted,Internally-displaced people or households. More specific than +displaced.
40
+ incamp,1.0,(6) Impact attributes,#affected #inneed #population #reached #targeted,Located in camps.
41
+ infected,1.0,(6) Impact attributes,#affected #inneed #reached #targeted,People infected with a disease.
42
+ injured,1.0,(6) Impact attributes,#affected #inneed #reached #targeted,People injured.
43
+ killed,1.0,(6) Impact attributes,#affected,People killed.
44
+ noncamp,1.0,(6) Impact attributes,#affected #inneed #population #reached #targeted,Not located in camps.
45
+ refugees,1.0,(7) General attributes,#affected #inneed #population #reached #targeted,Refugee people or households. More specific than +displaced.
46
+ email,1.0,(7) General attributes,#contact,Email address.
47
+ id,1.1,(7) General attributes,#meta,Use with #meta to provide internal identifiers for data records.
48
+ label,1.0,(7) General attributes,,Text labels (for a table or chart).
49
+ name,1.0,(7) General attributes,#activity #adm1 #adm2 #adm3 #adm4 #adm5 #beneficiary #cause #channel #contact #country #crisis #event #group #impact #indicator #item #loc #modality #need #org #output #region #sector #service #severity #status #subsector,"Human-readable name, title, or label."
50
+ num,1.0,(7) General attributes,#capacity #indicator #output,"The data consists of quantitative, numeric information."
51
+ phone,1.0,(7) General attributes,#contact,The data consists of #contact phone numbers.
52
+ source,1.0,(7) General attributes,#meta,Information source for the data in the row or record.
53
+ text,1.0,(7) General attributes,#indicator,"The data consists of qualitative, narrative textual information."
54
+ url,1.0,(7) General attributes,#activity #contact #meta #org #service,"The data consists of web links related to the main hashtag (e.g. for an #org, #service, #activity, #loc, etc)."
@@ -0,0 +1,14 @@
1
+ code, name+en, name
2
+ ar, Arabic,
3
+ en, English, English
4
+ es, Spanish, Español
5
+ de, German, Deutsch
6
+ fa, Dari / Farsi / Persian,
7
+ fr, French,
8
+ ms, Malay,
9
+ ps, Pashto,
10
+ ru, Russian,
11
+ sw, Swahili,
12
+ tl, Tagalog,
13
+ uk, Ukrainian,
14
+ ur, Urdu,
@@ -0,0 +1,45 @@
1
+ tag,type,since,category,attributes,description
2
+ adm1,,1.0,(1) Places,+code +dest +name +origin,Top-level subnational administrative area (e.g. a governorate in Syria).
3
+ adm2,,1.0,(1) Places,+code +dest +name +origin,Second-level subnational administrative area (e.g. a subdivision in Bangladesh).
4
+ adm3,,1.0,(1) Places,+code +dest +name +origin,Third-level subnational administrative area (e.g. a subdistrict in Afghanistan).
5
+ adm4,,1.0,(1) Places,+code +dest +name +origin,Fourth-level subnational administrative area (e.g. a barangay in the Philippines).
6
+ adm5,,1.0,(1) Places,+code +dest +name +origin,Fifth-level subnational administrative area (e.g. a ward of a city).
7
+ country,,1.0,(1) Places,+code +dest +name +origin,Country (often left implied in a dataset). Also sometimes known as admin level 0.
8
+ geo,,1.0,(1) Places,+bounds +coord +elevation +lat +lon,"Geodetic geometry information (points, lines, shapes). Use for latitude and longitude, as well as bounds information."
9
+ loc,,1.0,(1) Places,+code +dest +name +origin +type,"Any general location, such as a village, camp, or clinic."
10
+ region,,1.0,(2) People and households,+code +dest +name +origin,"A broad, supra- or cross-national geographical region (e.g. Sahel, Horn of Africa, Central Asia, Caribbean). Not to be confused with ""region"" used as the name of a subnational area (#adm1) in some countries."
11
+ affected,number,1.0,(2) People and households,+abducted +adolescents +adults +children +displaced +elderly +f +hh +i +idps +incamp +ind +infants +infected +injured +killed +m +noncamp +refugees,Number of people or households affected by an emergency. Subset of #population; superset of #inneed.
12
+ beneficiary,,1.0,(2) People and households,+code +name +type,"General (non-numeric) information about a person or group meant to benefit from aid activities, e.g. ""lactating women""."
13
+ inneed,number,1.0,(2) People and households,+abducted +adolescents +adults +children +displaced +elderly +f +hh +i +idps +incamp +ind +infants +infected +injured +m +noncamp +refugees,Number of people or households in need of humanitarian assistance. Subset of #affected; superset of #targeted.
14
+ population,number,1.0,(2) People and households,+adolescents +adults +children +displaced +elderly +f +hh +i +idps +incamp +ind +infants +m +noncamp +refugees,"General population number for an area or location, regardless of their specific humanitarian needs."
15
+ reached,number,1.0,(2) People and households,+abducted +adolescents +adults +children +displaced +elderly +f +hh +i +idps +incamp +ind +infants +infected +injured +m +noncamp +refugees,Number of people or households reached with humanitarian assistance. Subset of #targeted.
16
+ respondee,,1.1,(2) People and households,,"Descriptive information, such as name, identifier, or traits, for a single respondee (person, household, etc.) in survey-style data."
17
+ targeted,number,1.0,(3) Responses and other operations,+abducted +adolescents +adults +children +displaced +elderly +f +hh +i +idps +incamp +ind +infants +infected +injured +m +noncamp +refugees,Number of people or households targeted for humanitarian assistance. Subset of #inneed; superset of #reached.
18
+ access,,1.1,(3) Responses and other operations,+type,"Accessiblity and constraints on access to a market, distribution point, facility, etc."
19
+ activity,,1.0,(3) Responses and other operations,+activity +code +name +programme +project +type +url,"A programme, project, or other activity. This hashtag applies to all levels; use the attributes +activity, +project, or +programme to distinguish different hierarchical levels."
20
+ capacity,,1.0,(3) Responses and other operations,+num +type,"The response capacity of the entity being described (e.g. ""25 beds"")."
21
+ contact,,1.0,(3) Responses and other operations,+email +name +phone +type +url,Contact information for the subject of a data record (e.g. an activity).
22
+ frequency,,1.1,(3) Responses and other operations,+type,The frequency with which something occurs.
23
+ indicator,,1.0,(3) Responses and other operations,+code +name +num +text +type,"A general hashtag for an indicator being tracked. See also #output, #capacity, #need, #impact, #severity, #affected, #inneed, #targeted, and #reached for more-specific indicator-related hashtags."
24
+ item,,1.1,(3) Responses and other operations,+code +name +provided +type +used,"Physical things provided, stored, shipped, available, used, etc."
25
+ need,,1.1,(3) Responses and other operations,+code +name +type,"A(n) (unfulfilled) need for an affected person, household, group, or population."
26
+ operations,,1.0,(3) Responses and other operations,+type,"Information that affects humanitarian operations, such as a restriction on movement or road closure."
27
+ org,,1.0,(3) Responses and other operations,+code +funder +impl +name +prog +type +url,"An organisation contributing to a humanitarian emergency response, e.g. a local government, community-based organisation, NGO, agency, donor, or law-enforcement or military unit. Use #group for organisations that are not part of the emergency response (e.g. a paramilitary group)."
28
+ output,,1.0,(3) Responses and other operations,+code +name +num +type,"An output indicator (e.g. ""number of water-purification kits distributed""). A more-specific alternative to #indicator, especially for 3W-style activity reports."
29
+ sector,,1.0,(3) Responses and other operations,+cluster +code +name +type,A humanitarian cluster or sector.
30
+ service,,1.1,(3) Responses and other operations,+code +name +provided +type +url +used,"A service used or needed by an affected person, household, group, or population."
31
+ subsector,,1.0,(4) Cash and finance,+code +name +type,A humanitarian subsector.
32
+ channel,,1.1,(4) Cash and finance,+code +name,The detailed method of delivering aid (e.g. smartcard vs mobile transfer). More specific than #modality.
33
+ currency,,1.1,(4) Cash and finance,+code,"Name or ISO 4217 currency code for all financial #value cells in the row (e.g. ""EUR""). Typically used together with #value in financial or cash data."
34
+ modality,,1.1,(4) Cash and finance,+code +name,"The means by which an aid activity is accomplished. For cash transfers, values might include ""cash"", ""vouchers"", ""in-kind"", etc. May also be used for other types of modalities in other contexts."
35
+ value,number,1.1,"(5) Crises, incidents, and events",,"A monetary value, such as the price of goods in a market, a project budget, or the amount of cash transferred to beneficiaries. May be used together with #currency in financial or cash data."
36
+ cause,,1.0,"(5) Crises, incidents, and events",+code +name +type,"The cause of an event, crisis, etc."
37
+ crisis,,1.0,"(5) Crises, incidents, and events",+code +name +type,A humanitarian emergency.
38
+ event,,1.0,"(5) Crises, incidents, and events",+code +name +type,"An individual event or incident within a crisis/emergency, such as a (localised) flood, bridge collapse, or conflict."
39
+ group,,1.0,"(5) Crises, incidents, and events",+code +name +type,"A non-humanitarian group (of any type) related to humanitarian crisis (e.g., a paramilitary group) Use #org instead for a humanitarian organisation such as an NGO, contributing to the humanitarian response."
40
+ impact,,1.0,"(5) Crises, incidents, and events",+code +name +type,The impact of a crisis on a group or other entity.
41
+ severity,,1.0,(6) Metadata,+code +name +type,Severity of the crisis or event.
42
+ date,date,1.0,(6) Metadata,+approved +canceled +converted +end +occurred +reported +start,"Date related to the data in the record applies. Preferred format is ISO 8610 (e.g. ""2015-06-01"", ""2015-Q1"", etc.)"
43
+ description,,1.0,(6) Metadata,+type,Long description for a data record.
44
+ meta,,1.0,(6) Metadata,+id +source +url,Metadata about a row.
45
+ status,,1.0,(6) Metadata,+code +name,"Project/activity status description (such as ""planned"", ""active"", ""canceled"", or ""complete"")."
@@ -0,0 +1,7 @@
1
+ type,description
2
+ text,
3
+ number,
4
+ url,
5
+ email,
6
+ phone,
7
+ date,
@@ -0,0 +1,4 @@
1
+ version, date
2
+ 1.0, 2016-03-18
3
+ 1.1, 2018-04-30
4
+ 1.2,
@@ -10,7 +10,8 @@ require 'csvhuman/column'
10
10
  require 'csvhuman/converter'
11
11
  require 'csvhuman/reader'
12
12
 
13
-
13
+ require 'csvhuman/doc/helper.rb'
14
+ require 'csvhuman/doc/schema.rb'
14
15
 
15
16
 
16
17
  # say hello
@@ -18,4 +18,113 @@ HEADER_CONVERTERS = {
18
18
  gsub( /[^\w]+/, '' ).to_sym }
19
19
  }
20
20
 
21
+
22
+
23
+
24
+ def self.guess_type( name, attributes )
25
+ if name == 'date'
26
+ if attributes.include?( 'year' )
27
+ Integer ## just the year (e.g. 2011); use an integer number
28
+ else
29
+ Date
30
+ end
31
+ ## todo/fix: add more well-known names with num required!!!
32
+ elsif ['affected', 'inneed', 'targeted', 'reached', 'population'].include?( name )
33
+ Integer
34
+ else
35
+ ## check attributes
36
+ if attributes.nil? || attributes.empty?
37
+ String ## assume (default to) string
38
+ elsif attributes.include?( 'num' ) ||
39
+ attributes.include?( 'id') ## assume id is (always) a rowid - why? why not?
40
+ Integer
41
+ elsif attributes.include?( 'date' ) ### todo/check: exists +date?
42
+ Date
43
+ elsif name == 'geo' && (attributes.include?('lat') ||
44
+ attributes.include?('lon') ||
45
+ attributes.include?('elevation'))
46
+ Float
47
+ elsif attributes.include?( 'killed' ) ||
48
+ attributes.include?( 'injured' ) ||
49
+ attributes.include?( 'infected' ) ||
50
+ attributes.include?( 'displaced' ) ||
51
+ attributes.include?( 'idps' ) ||
52
+ attributes.include?( 'refugees' ) ||
53
+ attributes.include?( 'abducted' ) ||
54
+ attributes.include?( 'threatened' ) ||
55
+ attributes.include?( 'affected' ) ||
56
+ attributes.include?( 'inneed' ) ||
57
+ attributes.include?( 'targeted' ) ||
58
+ attributes.include?( 'reached' )
59
+ Integer
60
+ else
61
+ String ## assume (default to) string
62
+ end
63
+ end
64
+ end
65
+
66
+
67
+ ## convert guess_type to proc (is there a better/idomatic way)?
68
+ # ->(name, attributes) { guess_type( name, attributes ) }
69
+ ## TYPE_MAPPING_GUESS = Kernel.method( :guess_type )
70
+
71
+ TYPE_MAPPINGS = {
72
+ ## always returns string (that is, keep as is (assumes always string values))
73
+ none: ->(name, attributes) { String },
74
+ guess: ->(name, attributes) { guess_type( name, attributes ) },
75
+ }
76
+
77
+ ## add aliases (check - is there a better/idomatic way?)
78
+ TYPE_MAPPINGS[ :default] = TYPE_MAPPINGS[:guess] ## alias for guess
79
+ TYPE_MAPPINGS[ :all ] = TYPE_MAPPINGS[:guess] ## alias for guess (yes, another one - why? why not?)
80
+
81
+
82
+
83
+
84
+
85
+ def self.convert_to_i( value )
86
+ if value.nil? || value.empty?
87
+ nil ## return nil - why? why not?
88
+ else
89
+ Integer( value )
90
+ end
91
+ end
92
+
93
+ def self.convert_to_f( value )
94
+ if value.nil? || value.empty?
95
+ nil ## return nil - why? why not?
96
+ else
97
+ ## todo/fix: add support for NaN, Inf, -Inf etc.
98
+ ## how to deal with conversion errors (throw exception? ignore? NaN? why? why not?)
99
+ Float( value )
100
+ end
101
+ end
102
+
103
+ def self.convert_to_date( value )
104
+ if value.nil? || value.empty?
105
+ nil ## return nil - why? why not?
106
+ else
107
+ ## todo/fix: add support for more formats
108
+ ## how to deal with conversion errors (throw exception? ignore? why? why not?)
109
+ if value =~ /\d{4}-\d{1,2}-\d{1,2}/ ### todo: check if 2014-1-9 works for strptime too (leading zero rquired)?
110
+ Date.strptime( value, "%Y-%m-%d" ) # 2014-11-09
111
+ elsif value =~ /\d{1,2}\/\d{1,2}\/\d{4}/
112
+ Date.strptime( value, "%d/%m/%Y" ) # 09/11/2014
113
+ else
114
+ ## todo/fix: throw argument/value error - why? why not
115
+ nil
116
+ end
117
+ end
118
+ end
119
+
120
+
121
+
122
+ TYPE_CONVERTERS = {
123
+ Integer => ->(value) { convert_to_i(value) },
124
+ Float => ->(value) { convert_to_f(value) },
125
+ Date => ->(value) { convert_to_date(value) },
126
+ }
127
+
128
+
129
+
21
130
  end # class CsvHuman
@@ -0,0 +1,108 @@
1
+ # encoding: utf-8
2
+
3
+ class CsvHuman
4
+ module DocHelper
5
+
6
+
7
+ HASHTAG_LINE_RX= /^
8
+ \s*
9
+ \#
10
+ (?<name>[a-z][a-z0-9]+)
11
+ \s*
12
+ $/x
13
+
14
+ def match_hashtag( line )
15
+ if (m=HASHTAG_LINE_RX.match(line))
16
+ puts "hashtag >#{m[:name]}<"
17
+ m
18
+ else
19
+ nil
20
+ end
21
+ end
22
+
23
+
24
+
25
+ ## note: attrib might be one letter only (e.g.) +m,+f, etc.
26
+ ATTRIBUTE_LINE_RX= /^
27
+ \s*
28
+ \+
29
+ (?<name>[a-z][a-z0-9]*)
30
+ \s*
31
+ $/x
32
+
33
+ def match_attribute( line )
34
+ if (m=ATTRIBUTE_LINE_RX.match(line))
35
+ puts "attrib >#{m[:name]}<"
36
+ m
37
+ else
38
+ false
39
+ end
40
+ end
41
+
42
+
43
+
44
+ ##
45
+ ## e.g. 1.1. Places
46
+ ## 2.1. Sex- and-age disaggregation (SADD) attributes
47
+
48
+ HEADING_LINE_RX=/^
49
+ \s*
50
+ (?<level1>[1-9])
51
+ \.
52
+ (?<level2>[1-9])
53
+ \.
54
+ \s+
55
+ (?<title>.+?)
56
+ \s*
57
+ $/x
58
+
59
+ def match_heading( line )
60
+ if (m=HEADING_LINE_RX.match(line))
61
+ puts "heading #{m[:level1]}.#{m[:level2]}. (#{m[:level2]}) >#{m[:title]}<"
62
+ m
63
+ else
64
+ false
65
+ end
66
+ end
67
+
68
+
69
+
70
+ TYPE_RX = /Every value must be a (?<type>[a-z]+)./
71
+ def match_type( line )
72
+ if (m=TYPE_RX.match(line))
73
+ puts "type: >#{m[:type]}<"
74
+ m
75
+ else
76
+ false
77
+ end
78
+ end
79
+
80
+
81
+
82
+ SINCE_HXL_RX = /Since HXL (?<version>[1]\.[0-9])\.?/
83
+ def match_since_hxl( line )
84
+ if (m=SINCE_HXL_RX.match(line))
85
+ puts "version: >#{m[:version]}<"
86
+ m
87
+ else
88
+ false
89
+ end
90
+ end
91
+
92
+
93
+
94
+ def split_descr( line )
95
+ if( m=match_since_hxl( line ))
96
+ version = m[:version]
97
+ ## remove "Since HXL 1.0" from text
98
+ text = line.gsub( SINCE_HXL_RX, '' ).strip
99
+ else
100
+ version = '?'
101
+ text = line
102
+ end
103
+ [text,version]
104
+ end
105
+
106
+
107
+ end # module DocHelper
108
+ end # class CsvHuman
@@ -0,0 +1,151 @@
1
+ # encoding: utf-8
2
+
3
+ class CsvHuman
4
+ class Doc ## tags and attributes (schema) reader / converter (txt to csv)
5
+ include DocHelper
6
+
7
+
8
+
9
+ def self.read_attributes( path )
10
+ self.open( path ) { |doc| doc.parse_attributes }
11
+ end
12
+
13
+ def self.read_tags( path )
14
+ self.open( path ) { |doc| doc.parse_tags }
15
+ end
16
+
17
+ def self.open( path, mode=nil, &block ) ## rename path to filename or name - why? why not?
18
+
19
+ ## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
20
+ f = File.open( path, mode ? mode : 'r:bom|utf-8' )
21
+ doc = self.new( f )
22
+
23
+ # handle blocks like Ruby's open(), not like the (old old) CSV library
24
+ if block_given?
25
+ begin
26
+ block.call( doc )
27
+ ensure
28
+ f.close
29
+ end
30
+ else
31
+ doc ## note: caller responsible for closing (todo/fix: add close,closed? to doc!!!)
32
+ end
33
+ end # method self.open
34
+
35
+
36
+
37
+
38
+ def initialize( str_or_readable )
39
+ # note: must (only) support/respond_to read_line
40
+ @input = str_or_readable
41
+ end
42
+
43
+
44
+ def parse_attributes
45
+
46
+ attrib = nil
47
+ category = nil
48
+ descr = nil
49
+ version = nil
50
+ tags = []
51
+
52
+ next_line = nil ## e.g. set to :descr
53
+
54
+ attribs = []
55
+
56
+ @input.each_line do |line|
57
+ line = line.chomp( '' )
58
+
59
+ line = line.strip ## remove leading and trailing spaces
60
+
61
+
62
+ next if line.empty? || line.start_with?( '%' ) ## skip blank lines and comment lines
63
+
64
+ if next_line == :descr
65
+ ## auto-capture next line (if descr reset to nil)
66
+ descr, version = split_descr( line )
67
+ puts "descr >#{descr}<, version >#{version}<"
68
+
69
+ next_line = nil
70
+ elsif (m=match_heading( line ))
71
+ category = "(#{m[:level2]}) #{m[:title]}"
72
+ elsif (m=match_attribute( line ))
73
+ if attrib
74
+ attribs << [attrib, version, category, tags.join( ' ' ), descr]
75
+ end
76
+
77
+ attrib = m[:name]
78
+ tags = []
79
+ next_line = :descr ## reset descr to nil - will auto-capture next line
80
+ elsif (m=match_hashtag( line ))
81
+ tags << "##{m[:name]}"
82
+ end
83
+ end
84
+
85
+ if attrib
86
+ attribs << [attrib, version, category, tags.join( ' ' ), descr]
87
+ end
88
+
89
+ attribs
90
+ end # method parse_attributes
91
+
92
+
93
+
94
+ def parse_tags
95
+
96
+ tag = nil
97
+ type = nil
98
+ category = nil
99
+ descr = nil
100
+ version = nil
101
+ attribs = []
102
+
103
+ next_line = nil ## e.g. set to :descr
104
+
105
+
106
+ tags = []
107
+
108
+ @input.each_line do |line|
109
+ line = line.chomp( '' )
110
+
111
+ line = line.strip ## remove leading and trailing spaces
112
+
113
+
114
+ next if line.empty? || line.start_with?( '%' ) ## skip blank lines and comment lines
115
+
116
+ if next_line == :descr
117
+ ## auto-capture next line (if descr reset to nil)
118
+ descr, version = split_descr( line )
119
+
120
+ ## descr = "(2) People and households" if descr == "(2) Surveys and assessments"
121
+
122
+ puts "descr >#{descr}<, version >#{version}<"
123
+
124
+ next_line = nil
125
+ elsif (m=match_heading( line ))
126
+ category = "(#{m[:level2]}) #{m[:title]}"
127
+ elsif (m=match_type( line ))
128
+ type = m[:type]
129
+ elsif (m=match_hashtag( line ))
130
+ if tag
131
+ tags << [tag, type, version, category, attribs.join( ' ' ), descr]
132
+ end
133
+
134
+ tag = m[:name]
135
+ attribs = []
136
+ type = nil
137
+ next_line = :descr ## reset descr to nil - will auto-capture next line
138
+ elsif (m=match_attribute( line ))
139
+ attribs << "+#{m[:name]}"
140
+ end
141
+ end
142
+
143
+ if tag
144
+ tags << [tag, type, version, category, attribs.join( ' ' ), descr]
145
+ end
146
+
147
+ tags
148
+ end # method parse_tags
149
+
150
+ end # class Doc
151
+ end # class CsvHuman