csvhuman 1.0.1 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +18 -0
- data/config/attributes.csv +54 -0
- data/config/langs.csv +14 -0
- data/config/tags.csv +45 -0
- data/config/types.csv +7 -0
- data/config/versions.csv +4 -0
- data/lib/csvhuman/base.rb +2 -1
- data/lib/csvhuman/converter.rb +109 -0
- data/lib/csvhuman/doc/helper.rb +108 -0
- data/lib/csvhuman/doc/schema.rb +151 -0
- data/lib/csvhuman/tag.rb +49 -86
- data/lib/csvhuman/version.rb +2 -2
- data/test/data/airports.csv +6 -0
- data/test/data/ebola.csv +76 -0
- data/test/data/hdx/ebola_treatment_centres.csv +94 -0
- data/test/data/hdx/phl_haima_houses_damaged.csv +165 -0
- data/test/data/hdx/zika_cases.csv +303 -0
- data/test/data/unhcr.csv +85 -0
- data/test/test_doc.rb +129 -0
- data/test/test_hdx.rb +28 -0
- data/test/test_misc.rb +28 -0
- data/test/test_type_converters.rb +42 -0
- data/test/test_type_mappings.rb +57 -0
- metadata +20 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eac10294bb4add6317df3fd1eb2dcab35f75315d
|
4
|
+
data.tar.gz: 2072e628e65873a95584fa70fec0bf11a66e2322
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 931ada0926ae767b7b2348c237694ec35222ff06dfdbac7298527744c0ab99444778d5031355b106ed148551646e301c12d2cc32df8b0dd7001e2b91003c259f
|
7
|
+
data.tar.gz: 0ba952f578bfafb466a99073792d9c0173b24bf689571ec140ed8e61a7ef00f7addea40979bb8ce495d1a8a1266ce01845f5e551653f02afb15c0ea516021bf3
|
data/Manifest.txt
CHANGED
@@ -2,20 +2,38 @@ HISTORY.md
|
|
2
2
|
Manifest.txt
|
3
3
|
README.md
|
4
4
|
Rakefile
|
5
|
+
config/attributes.csv
|
6
|
+
config/langs.csv
|
7
|
+
config/tags.csv
|
8
|
+
config/types.csv
|
9
|
+
config/versions.csv
|
5
10
|
lib/csvhuman.rb
|
6
11
|
lib/csvhuman/base.rb
|
7
12
|
lib/csvhuman/column.rb
|
8
13
|
lib/csvhuman/converter.rb
|
14
|
+
lib/csvhuman/doc/helper.rb
|
15
|
+
lib/csvhuman/doc/schema.rb
|
9
16
|
lib/csvhuman/reader.rb
|
10
17
|
lib/csvhuman/tag.rb
|
11
18
|
lib/csvhuman/version.rb
|
19
|
+
test/data/airports.csv
|
20
|
+
test/data/ebola.csv
|
21
|
+
test/data/hdx/ebola_treatment_centres.csv
|
22
|
+
test/data/hdx/phl_haima_houses_damaged.csv
|
23
|
+
test/data/hdx/zika_cases.csv
|
12
24
|
test/data/sample1.csv
|
13
25
|
test/data/sample2.csv
|
14
26
|
test/data/sample3.csv
|
15
27
|
test/data/sample4.csv
|
16
28
|
test/data/test.csv
|
29
|
+
test/data/unhcr.csv
|
17
30
|
test/helper.rb
|
31
|
+
test/test_doc.rb
|
32
|
+
test/test_hdx.rb
|
18
33
|
test/test_header_converter.rb
|
34
|
+
test/test_misc.rb
|
19
35
|
test/test_reader.rb
|
20
36
|
test/test_samples.rb
|
21
37
|
test/test_tags.rb
|
38
|
+
test/test_type_converters.rb
|
39
|
+
test/test_type_mappings.rb
|
@@ -0,0 +1,54 @@
|
|
1
|
+
attribute,since,category,tags,description
|
2
|
+
adolescents,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"Adolescents, loosely defined (precise age range varies); may overlap +children and +adult. You can optionally create custom attributes in addition to this to add precise age ranges, e.g. ""+adolescents +age12_17""."
|
3
|
+
adults,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"Adults, loosely defined (precise age range varies); may overlap +adolescents and +elderly. You can optionally create custom attributes in addition to this to add precise age ranges, e.g. ""+adults +age18_64""."
|
4
|
+
children,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"The associated hashtag applies to non-adults, loosely defined (precise age range varies; may overlap +infants and +adolescents). You can optionally create custom attributes in addition to this to add precise age ranges, e.g. ""+children +age3_11""."
|
5
|
+
elderly,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"Elderly people, loosely defined (precise age range varies). May overlap +adults. You can optionally create custom attributes in addition to this to add precise age ranges, e.g. ""+elderly +age65plus""."
|
6
|
+
f,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,Female people. See also +m and +i.
|
7
|
+
hh,1.1,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"Households (vs +ind for individual people). The exact definition of ""household"" may vary among aid organisations."
|
8
|
+
i,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,Intersex or non-gender-binary people. Use this attribute for any groups who do not identify as male (+m) or female (+f).
|
9
|
+
ind,1.1,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,Individual people (vs +hh for households).
|
10
|
+
infants,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"Infant children, loosely defined (precise age range varies; may overlap +children). You can optionally create custom attributes in addition to this to add precise age ranges, e.g. ""+infants +age0_2""."
|
11
|
+
m,1.0,(2) Organisation and activity attributes,#affected #inneed #population #reached #targeted,Male people. See also +f and +i.
|
12
|
+
activity,1.1,(2) Organisation and activity attributes,#activity,"The implementers classify this activity as an ""activity"" proper (may imply different hierarchical levels in different contexts)."
|
13
|
+
cluster,1.1,(2) Organisation and activity attributes,#sector,Identifies a sector as a formal IASC humanitarian cluster.
|
14
|
+
funder,1.0,(2) Organisation and activity attributes,#org,Funding org/agency (e.g. donor).
|
15
|
+
impl,1.0,(2) Organisation and activity attributes,#org,Implementing partner.
|
16
|
+
prog,1.0,(2) Organisation and activity attributes,#org,Programming org/agency.
|
17
|
+
programme,1.1,(2) Organisation and activity attributes,#activity,"The implementers classify this activity as a ""programme"" (may imply different hierarchical levels in different contexts)."
|
18
|
+
project,1.1,(2) Organisation and activity attributes,#activity,"The implementers classify this activity as a ""project"" (may imply different hierarchical levels in different contexts)."
|
19
|
+
provided,1.1,(2) Organisation and activity attributes,#item #service,"Refers to a #service, #item, etc. that has been provided to people in need."
|
20
|
+
used,1.1,(3) Classification attributes,#item #service,"Refers to a #service, #item, etc. that affected people have actually consumed or otherwise taken advantage of."
|
21
|
+
code,1.0,(3) Classification attributes,#activity #adm1 #adm2 #adm3 #adm4 #adm5 #beneficiary #cause #channel #country #crisis #currency #event #group #impact #indicator #item #loc #modality #need #org #output #region #sector #service #severity #status #subsector,"A unique, machine-readable code."
|
22
|
+
type,1.0,(4) Geographical attributes,#access #activity #beneficiary #capacity #cause #contact #crisis #description #event #frequency #group #impact #indicator #item #loc #need #operations #org #output #sector #service #severity #subsector,"Types or categories. Use with #org, #loc, #indicator, etc to provide classification information."
|
23
|
+
bounds,1.0,(4) Geographical attributes,#geo,Boundary data (e.g. inline GeoJSON).
|
24
|
+
coord,1.0,(4) Geographical attributes,#geo,Geodetic coordinates (lat+lon together).
|
25
|
+
dest,1.1,(4) Geographical attributes,#adm1 #adm2 #adm3 #adm4 #adm5 #country #loc #region,Place of destination (intended or actual).
|
26
|
+
elevation,1.0,(4) Geographical attributes,#geo,Elevation above sea level (usually metres).
|
27
|
+
lat,1.0,(4) Geographical attributes,#geo,Latitude (decimal degrees preferred).
|
28
|
+
lon,1.0,(4) Geographical attributes,#geo,Longitude (decimal degrees preferred).
|
29
|
+
origin,1.1,(5) Date attributes,#adm1 #adm2 #adm3 #adm4 #adm5 #country #loc #region,"The data describes places of origin (intended or actual), e.g. the country of origin for displaced people."
|
30
|
+
approved,1.0,(5) Date attributes,#date,Date or time when something was approved.
|
31
|
+
canceled,1.0,(5) Date attributes,#date,Date or time when something (e.g. an #activity) was canceled.
|
32
|
+
converted,1.1,(5) Date attributes,#date,Date or time used for converting a monetary value to another currency.
|
33
|
+
end,1.0,(5) Date attributes,#date,Date or time when something finished or will finish.
|
34
|
+
occurred,1.0,(5) Date attributes,#date,Date or time when something took place.
|
35
|
+
reported,1.0,(5) Date attributes,#date,Date or time when the information was reported.
|
36
|
+
start,1.0,(6) Impact attributes,#date,Date or time when something started or will start.
|
37
|
+
abducted,1.1,(6) Impact attributes,#affected #inneed #reached #targeted,Hashtag refers to people who have been abducted.
|
38
|
+
displaced,1.0,(6) Impact attributes,#affected #inneed #population #reached #targeted,Displaced people or households. Refers to all types of displacement: use +idps or +refugees to be more specific.
|
39
|
+
idps,1.0,(6) Impact attributes,#affected #inneed #population #reached #targeted,Internally-displaced people or households. More specific than +displaced.
|
40
|
+
incamp,1.0,(6) Impact attributes,#affected #inneed #population #reached #targeted,Located in camps.
|
41
|
+
infected,1.0,(6) Impact attributes,#affected #inneed #reached #targeted,People infected with a disease.
|
42
|
+
injured,1.0,(6) Impact attributes,#affected #inneed #reached #targeted,People injured.
|
43
|
+
killed,1.0,(6) Impact attributes,#affected,People killed.
|
44
|
+
noncamp,1.0,(6) Impact attributes,#affected #inneed #population #reached #targeted,Not located in camps.
|
45
|
+
refugees,1.0,(7) General attributes,#affected #inneed #population #reached #targeted,Refugee people or households. More specific than +displaced.
|
46
|
+
email,1.0,(7) General attributes,#contact,Email address.
|
47
|
+
id,1.1,(7) General attributes,#meta,Use with #meta to provide internal identifiers for data records.
|
48
|
+
label,1.0,(7) General attributes,,Text labels (for a table or chart).
|
49
|
+
name,1.0,(7) General attributes,#activity #adm1 #adm2 #adm3 #adm4 #adm5 #beneficiary #cause #channel #contact #country #crisis #event #group #impact #indicator #item #loc #modality #need #org #output #region #sector #service #severity #status #subsector,"Human-readable name, title, or label."
|
50
|
+
num,1.0,(7) General attributes,#capacity #indicator #output,"The data consists of quantitative, numeric information."
|
51
|
+
phone,1.0,(7) General attributes,#contact,The data consists of #contact phone numbers.
|
52
|
+
source,1.0,(7) General attributes,#meta,Information source for the data in the row or record.
|
53
|
+
text,1.0,(7) General attributes,#indicator,"The data consists of qualitative, narrative textual information."
|
54
|
+
url,1.0,(7) General attributes,#activity #contact #meta #org #service,"The data consists of web links related to the main hashtag (e.g. for an #org, #service, #activity, #loc, etc)."
|
data/config/langs.csv
ADDED
data/config/tags.csv
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
tag,type,since,category,attributes,description
|
2
|
+
adm1,,1.0,(1) Places,+code +dest +name +origin,Top-level subnational administrative area (e.g. a governorate in Syria).
|
3
|
+
adm2,,1.0,(1) Places,+code +dest +name +origin,Second-level subnational administrative area (e.g. a subdivision in Bangladesh).
|
4
|
+
adm3,,1.0,(1) Places,+code +dest +name +origin,Third-level subnational administrative area (e.g. a subdistrict in Afghanistan).
|
5
|
+
adm4,,1.0,(1) Places,+code +dest +name +origin,Fourth-level subnational administrative area (e.g. a barangay in the Philippines).
|
6
|
+
adm5,,1.0,(1) Places,+code +dest +name +origin,Fifth-level subnational administrative area (e.g. a ward of a city).
|
7
|
+
country,,1.0,(1) Places,+code +dest +name +origin,Country (often left implied in a dataset). Also sometimes known as admin level 0.
|
8
|
+
geo,,1.0,(1) Places,+bounds +coord +elevation +lat +lon,"Geodetic geometry information (points, lines, shapes). Use for latitude and longitude, as well as bounds information."
|
9
|
+
loc,,1.0,(1) Places,+code +dest +name +origin +type,"Any general location, such as a village, camp, or clinic."
|
10
|
+
region,,1.0,(2) People and households,+code +dest +name +origin,"A broad, supra- or cross-national geographical region (e.g. Sahel, Horn of Africa, Central Asia, Caribbean). Not to be confused with ""region"" used as the name of a subnational area (#adm1) in some countries."
|
11
|
+
affected,number,1.0,(2) People and households,+abducted +adolescents +adults +children +displaced +elderly +f +hh +i +idps +incamp +ind +infants +infected +injured +killed +m +noncamp +refugees,Number of people or households affected by an emergency. Subset of #population; superset of #inneed.
|
12
|
+
beneficiary,,1.0,(2) People and households,+code +name +type,"General (non-numeric) information about a person or group meant to benefit from aid activities, e.g. ""lactating women""."
|
13
|
+
inneed,number,1.0,(2) People and households,+abducted +adolescents +adults +children +displaced +elderly +f +hh +i +idps +incamp +ind +infants +infected +injured +m +noncamp +refugees,Number of people or households in need of humanitarian assistance. Subset of #affected; superset of #targeted.
|
14
|
+
population,number,1.0,(2) People and households,+adolescents +adults +children +displaced +elderly +f +hh +i +idps +incamp +ind +infants +m +noncamp +refugees,"General population number for an area or location, regardless of their specific humanitarian needs."
|
15
|
+
reached,number,1.0,(2) People and households,+abducted +adolescents +adults +children +displaced +elderly +f +hh +i +idps +incamp +ind +infants +infected +injured +m +noncamp +refugees,Number of people or households reached with humanitarian assistance. Subset of #targeted.
|
16
|
+
respondee,,1.1,(2) People and households,,"Descriptive information, such as name, identifier, or traits, for a single respondee (person, household, etc.) in survey-style data."
|
17
|
+
targeted,number,1.0,(3) Responses and other operations,+abducted +adolescents +adults +children +displaced +elderly +f +hh +i +idps +incamp +ind +infants +infected +injured +m +noncamp +refugees,Number of people or households targeted for humanitarian assistance. Subset of #inneed; superset of #reached.
|
18
|
+
access,,1.1,(3) Responses and other operations,+type,"Accessiblity and constraints on access to a market, distribution point, facility, etc."
|
19
|
+
activity,,1.0,(3) Responses and other operations,+activity +code +name +programme +project +type +url,"A programme, project, or other activity. This hashtag applies to all levels; use the attributes +activity, +project, or +programme to distinguish different hierarchical levels."
|
20
|
+
capacity,,1.0,(3) Responses and other operations,+num +type,"The response capacity of the entity being described (e.g. ""25 beds"")."
|
21
|
+
contact,,1.0,(3) Responses and other operations,+email +name +phone +type +url,Contact information for the subject of a data record (e.g. an activity).
|
22
|
+
frequency,,1.1,(3) Responses and other operations,+type,The frequency with which something occurs.
|
23
|
+
indicator,,1.0,(3) Responses and other operations,+code +name +num +text +type,"A general hashtag for an indicator being tracked. See also #output, #capacity, #need, #impact, #severity, #affected, #inneed, #targeted, and #reached for more-specific indicator-related hashtags."
|
24
|
+
item,,1.1,(3) Responses and other operations,+code +name +provided +type +used,"Physical things provided, stored, shipped, available, used, etc."
|
25
|
+
need,,1.1,(3) Responses and other operations,+code +name +type,"A(n) (unfulfilled) need for an affected person, household, group, or population."
|
26
|
+
operations,,1.0,(3) Responses and other operations,+type,"Information that affects humanitarian operations, such as a restriction on movement or road closure."
|
27
|
+
org,,1.0,(3) Responses and other operations,+code +funder +impl +name +prog +type +url,"An organisation contributing to a humanitarian emergency response, e.g. a local government, community-based organisation, NGO, agency, donor, or law-enforcement or military unit. Use #group for organisations that are not part of the emergency response (e.g. a paramilitary group)."
|
28
|
+
output,,1.0,(3) Responses and other operations,+code +name +num +type,"An output indicator (e.g. ""number of water-purification kits distributed""). A more-specific alternative to #indicator, especially for 3W-style activity reports."
|
29
|
+
sector,,1.0,(3) Responses and other operations,+cluster +code +name +type,A humanitarian cluster or sector.
|
30
|
+
service,,1.1,(3) Responses and other operations,+code +name +provided +type +url +used,"A service used or needed by an affected person, household, group, or population."
|
31
|
+
subsector,,1.0,(4) Cash and finance,+code +name +type,A humanitarian subsector.
|
32
|
+
channel,,1.1,(4) Cash and finance,+code +name,The detailed method of delivering aid (e.g. smartcard vs mobile transfer). More specific than #modality.
|
33
|
+
currency,,1.1,(4) Cash and finance,+code,"Name or ISO 4217 currency code for all financial #value cells in the row (e.g. ""EUR""). Typically used together with #value in financial or cash data."
|
34
|
+
modality,,1.1,(4) Cash and finance,+code +name,"The means by which an aid activity is accomplished. For cash transfers, values might include ""cash"", ""vouchers"", ""in-kind"", etc. May also be used for other types of modalities in other contexts."
|
35
|
+
value,number,1.1,"(5) Crises, incidents, and events",,"A monetary value, such as the price of goods in a market, a project budget, or the amount of cash transferred to beneficiaries. May be used together with #currency in financial or cash data."
|
36
|
+
cause,,1.0,"(5) Crises, incidents, and events",+code +name +type,"The cause of an event, crisis, etc."
|
37
|
+
crisis,,1.0,"(5) Crises, incidents, and events",+code +name +type,A humanitarian emergency.
|
38
|
+
event,,1.0,"(5) Crises, incidents, and events",+code +name +type,"An individual event or incident within a crisis/emergency, such as a (localised) flood, bridge collapse, or conflict."
|
39
|
+
group,,1.0,"(5) Crises, incidents, and events",+code +name +type,"A non-humanitarian group (of any type) related to humanitarian crisis (e.g., a paramilitary group) Use #org instead for a humanitarian organisation such as an NGO, contributing to the humanitarian response."
|
40
|
+
impact,,1.0,"(5) Crises, incidents, and events",+code +name +type,The impact of a crisis on a group or other entity.
|
41
|
+
severity,,1.0,(6) Metadata,+code +name +type,Severity of the crisis or event.
|
42
|
+
date,date,1.0,(6) Metadata,+approved +canceled +converted +end +occurred +reported +start,"Date related to the data in the record applies. Preferred format is ISO 8610 (e.g. ""2015-06-01"", ""2015-Q1"", etc.)"
|
43
|
+
description,,1.0,(6) Metadata,+type,Long description for a data record.
|
44
|
+
meta,,1.0,(6) Metadata,+id +source +url,Metadata about a row.
|
45
|
+
status,,1.0,(6) Metadata,+code +name,"Project/activity status description (such as ""planned"", ""active"", ""canceled"", or ""complete"")."
|
data/config/types.csv
ADDED
data/config/versions.csv
ADDED
data/lib/csvhuman/base.rb
CHANGED
data/lib/csvhuman/converter.rb
CHANGED
@@ -18,4 +18,113 @@ HEADER_CONVERTERS = {
|
|
18
18
|
gsub( /[^\w]+/, '' ).to_sym }
|
19
19
|
}
|
20
20
|
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
def self.guess_type( name, attributes )
|
25
|
+
if name == 'date'
|
26
|
+
if attributes.include?( 'year' )
|
27
|
+
Integer ## just the year (e.g. 2011); use an integer number
|
28
|
+
else
|
29
|
+
Date
|
30
|
+
end
|
31
|
+
## todo/fix: add more well-known names with num required!!!
|
32
|
+
elsif ['affected', 'inneed', 'targeted', 'reached', 'population'].include?( name )
|
33
|
+
Integer
|
34
|
+
else
|
35
|
+
## check attributes
|
36
|
+
if attributes.nil? || attributes.empty?
|
37
|
+
String ## assume (default to) string
|
38
|
+
elsif attributes.include?( 'num' ) ||
|
39
|
+
attributes.include?( 'id') ## assume id is (always) a rowid - why? why not?
|
40
|
+
Integer
|
41
|
+
elsif attributes.include?( 'date' ) ### todo/check: exists +date?
|
42
|
+
Date
|
43
|
+
elsif name == 'geo' && (attributes.include?('lat') ||
|
44
|
+
attributes.include?('lon') ||
|
45
|
+
attributes.include?('elevation'))
|
46
|
+
Float
|
47
|
+
elsif attributes.include?( 'killed' ) ||
|
48
|
+
attributes.include?( 'injured' ) ||
|
49
|
+
attributes.include?( 'infected' ) ||
|
50
|
+
attributes.include?( 'displaced' ) ||
|
51
|
+
attributes.include?( 'idps' ) ||
|
52
|
+
attributes.include?( 'refugees' ) ||
|
53
|
+
attributes.include?( 'abducted' ) ||
|
54
|
+
attributes.include?( 'threatened' ) ||
|
55
|
+
attributes.include?( 'affected' ) ||
|
56
|
+
attributes.include?( 'inneed' ) ||
|
57
|
+
attributes.include?( 'targeted' ) ||
|
58
|
+
attributes.include?( 'reached' )
|
59
|
+
Integer
|
60
|
+
else
|
61
|
+
String ## assume (default to) string
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
## convert guess_type to proc (is there a better/idomatic way)?
|
68
|
+
# ->(name, attributes) { guess_type( name, attributes ) }
|
69
|
+
## TYPE_MAPPING_GUESS = Kernel.method( :guess_type )
|
70
|
+
|
71
|
+
TYPE_MAPPINGS = {
|
72
|
+
## always returns string (that is, keep as is (assumes always string values))
|
73
|
+
none: ->(name, attributes) { String },
|
74
|
+
guess: ->(name, attributes) { guess_type( name, attributes ) },
|
75
|
+
}
|
76
|
+
|
77
|
+
## add aliases (check - is there a better/idomatic way?)
|
78
|
+
TYPE_MAPPINGS[ :default] = TYPE_MAPPINGS[:guess] ## alias for guess
|
79
|
+
TYPE_MAPPINGS[ :all ] = TYPE_MAPPINGS[:guess] ## alias for guess (yes, another one - why? why not?)
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
def self.convert_to_i( value )
|
86
|
+
if value.nil? || value.empty?
|
87
|
+
nil ## return nil - why? why not?
|
88
|
+
else
|
89
|
+
Integer( value )
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.convert_to_f( value )
|
94
|
+
if value.nil? || value.empty?
|
95
|
+
nil ## return nil - why? why not?
|
96
|
+
else
|
97
|
+
## todo/fix: add support for NaN, Inf, -Inf etc.
|
98
|
+
## how to deal with conversion errors (throw exception? ignore? NaN? why? why not?)
|
99
|
+
Float( value )
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def self.convert_to_date( value )
|
104
|
+
if value.nil? || value.empty?
|
105
|
+
nil ## return nil - why? why not?
|
106
|
+
else
|
107
|
+
## todo/fix: add support for more formats
|
108
|
+
## how to deal with conversion errors (throw exception? ignore? why? why not?)
|
109
|
+
if value =~ /\d{4}-\d{1,2}-\d{1,2}/ ### todo: check if 2014-1-9 works for strptime too (leading zero rquired)?
|
110
|
+
Date.strptime( value, "%Y-%m-%d" ) # 2014-11-09
|
111
|
+
elsif value =~ /\d{1,2}\/\d{1,2}\/\d{4}/
|
112
|
+
Date.strptime( value, "%d/%m/%Y" ) # 09/11/2014
|
113
|
+
else
|
114
|
+
## todo/fix: throw argument/value error - why? why not
|
115
|
+
nil
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
|
122
|
+
TYPE_CONVERTERS = {
|
123
|
+
Integer => ->(value) { convert_to_i(value) },
|
124
|
+
Float => ->(value) { convert_to_f(value) },
|
125
|
+
Date => ->(value) { convert_to_date(value) },
|
126
|
+
}
|
127
|
+
|
128
|
+
|
129
|
+
|
21
130
|
end # class CsvHuman
|
@@ -0,0 +1,108 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
class CsvHuman
|
4
|
+
module DocHelper
|
5
|
+
|
6
|
+
|
7
|
+
HASHTAG_LINE_RX= /^
|
8
|
+
\s*
|
9
|
+
\#
|
10
|
+
(?<name>[a-z][a-z0-9]+)
|
11
|
+
\s*
|
12
|
+
$/x
|
13
|
+
|
14
|
+
def match_hashtag( line )
|
15
|
+
if (m=HASHTAG_LINE_RX.match(line))
|
16
|
+
puts "hashtag >#{m[:name]}<"
|
17
|
+
m
|
18
|
+
else
|
19
|
+
nil
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
## note: attrib might be one letter only (e.g.) +m,+f, etc.
|
26
|
+
ATTRIBUTE_LINE_RX= /^
|
27
|
+
\s*
|
28
|
+
\+
|
29
|
+
(?<name>[a-z][a-z0-9]*)
|
30
|
+
\s*
|
31
|
+
$/x
|
32
|
+
|
33
|
+
def match_attribute( line )
|
34
|
+
if (m=ATTRIBUTE_LINE_RX.match(line))
|
35
|
+
puts "attrib >#{m[:name]}<"
|
36
|
+
m
|
37
|
+
else
|
38
|
+
false
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
##
|
45
|
+
## e.g. 1.1. Places
|
46
|
+
## 2.1. Sex- and-age disaggregation (SADD) attributes
|
47
|
+
|
48
|
+
HEADING_LINE_RX=/^
|
49
|
+
\s*
|
50
|
+
(?<level1>[1-9])
|
51
|
+
\.
|
52
|
+
(?<level2>[1-9])
|
53
|
+
\.
|
54
|
+
\s+
|
55
|
+
(?<title>.+?)
|
56
|
+
\s*
|
57
|
+
$/x
|
58
|
+
|
59
|
+
def match_heading( line )
|
60
|
+
if (m=HEADING_LINE_RX.match(line))
|
61
|
+
puts "heading #{m[:level1]}.#{m[:level2]}. (#{m[:level2]}) >#{m[:title]}<"
|
62
|
+
m
|
63
|
+
else
|
64
|
+
false
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
|
70
|
+
TYPE_RX = /Every value must be a (?<type>[a-z]+)./
|
71
|
+
def match_type( line )
|
72
|
+
if (m=TYPE_RX.match(line))
|
73
|
+
puts "type: >#{m[:type]}<"
|
74
|
+
m
|
75
|
+
else
|
76
|
+
false
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
|
82
|
+
SINCE_HXL_RX = /Since HXL (?<version>[1]\.[0-9])\.?/
|
83
|
+
def match_since_hxl( line )
|
84
|
+
if (m=SINCE_HXL_RX.match(line))
|
85
|
+
puts "version: >#{m[:version]}<"
|
86
|
+
m
|
87
|
+
else
|
88
|
+
false
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
def split_descr( line )
|
95
|
+
if( m=match_since_hxl( line ))
|
96
|
+
version = m[:version]
|
97
|
+
## remove "Since HXL 1.0" from text
|
98
|
+
text = line.gsub( SINCE_HXL_RX, '' ).strip
|
99
|
+
else
|
100
|
+
version = '?'
|
101
|
+
text = line
|
102
|
+
end
|
103
|
+
[text,version]
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
end # module DocHelper
|
108
|
+
end # class CsvHuman
|
@@ -0,0 +1,151 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
class CsvHuman
|
4
|
+
class Doc ## tags and attributes (schema) reader / converter (txt to csv)
|
5
|
+
include DocHelper
|
6
|
+
|
7
|
+
|
8
|
+
|
9
|
+
def self.read_attributes( path )
|
10
|
+
self.open( path ) { |doc| doc.parse_attributes }
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.read_tags( path )
|
14
|
+
self.open( path ) { |doc| doc.parse_tags }
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.open( path, mode=nil, &block ) ## rename path to filename or name - why? why not?
|
18
|
+
|
19
|
+
## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
|
20
|
+
f = File.open( path, mode ? mode : 'r:bom|utf-8' )
|
21
|
+
doc = self.new( f )
|
22
|
+
|
23
|
+
# handle blocks like Ruby's open(), not like the (old old) CSV library
|
24
|
+
if block_given?
|
25
|
+
begin
|
26
|
+
block.call( doc )
|
27
|
+
ensure
|
28
|
+
f.close
|
29
|
+
end
|
30
|
+
else
|
31
|
+
doc ## note: caller responsible for closing (todo/fix: add close,closed? to doc!!!)
|
32
|
+
end
|
33
|
+
end # method self.open
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
def initialize( str_or_readable )
|
39
|
+
# note: must (only) support/respond_to read_line
|
40
|
+
@input = str_or_readable
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
def parse_attributes
|
45
|
+
|
46
|
+
attrib = nil
|
47
|
+
category = nil
|
48
|
+
descr = nil
|
49
|
+
version = nil
|
50
|
+
tags = []
|
51
|
+
|
52
|
+
next_line = nil ## e.g. set to :descr
|
53
|
+
|
54
|
+
attribs = []
|
55
|
+
|
56
|
+
@input.each_line do |line|
|
57
|
+
line = line.chomp( '' )
|
58
|
+
|
59
|
+
line = line.strip ## remove leading and trailing spaces
|
60
|
+
|
61
|
+
|
62
|
+
next if line.empty? || line.start_with?( '%' ) ## skip blank lines and comment lines
|
63
|
+
|
64
|
+
if next_line == :descr
|
65
|
+
## auto-capture next line (if descr reset to nil)
|
66
|
+
descr, version = split_descr( line )
|
67
|
+
puts "descr >#{descr}<, version >#{version}<"
|
68
|
+
|
69
|
+
next_line = nil
|
70
|
+
elsif (m=match_heading( line ))
|
71
|
+
category = "(#{m[:level2]}) #{m[:title]}"
|
72
|
+
elsif (m=match_attribute( line ))
|
73
|
+
if attrib
|
74
|
+
attribs << [attrib, version, category, tags.join( ' ' ), descr]
|
75
|
+
end
|
76
|
+
|
77
|
+
attrib = m[:name]
|
78
|
+
tags = []
|
79
|
+
next_line = :descr ## reset descr to nil - will auto-capture next line
|
80
|
+
elsif (m=match_hashtag( line ))
|
81
|
+
tags << "##{m[:name]}"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
if attrib
|
86
|
+
attribs << [attrib, version, category, tags.join( ' ' ), descr]
|
87
|
+
end
|
88
|
+
|
89
|
+
attribs
|
90
|
+
end # method parse_attributes
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
def parse_tags
|
95
|
+
|
96
|
+
tag = nil
|
97
|
+
type = nil
|
98
|
+
category = nil
|
99
|
+
descr = nil
|
100
|
+
version = nil
|
101
|
+
attribs = []
|
102
|
+
|
103
|
+
next_line = nil ## e.g. set to :descr
|
104
|
+
|
105
|
+
|
106
|
+
tags = []
|
107
|
+
|
108
|
+
@input.each_line do |line|
|
109
|
+
line = line.chomp( '' )
|
110
|
+
|
111
|
+
line = line.strip ## remove leading and trailing spaces
|
112
|
+
|
113
|
+
|
114
|
+
next if line.empty? || line.start_with?( '%' ) ## skip blank lines and comment lines
|
115
|
+
|
116
|
+
if next_line == :descr
|
117
|
+
## auto-capture next line (if descr reset to nil)
|
118
|
+
descr, version = split_descr( line )
|
119
|
+
|
120
|
+
## descr = "(2) People and households" if descr == "(2) Surveys and assessments"
|
121
|
+
|
122
|
+
puts "descr >#{descr}<, version >#{version}<"
|
123
|
+
|
124
|
+
next_line = nil
|
125
|
+
elsif (m=match_heading( line ))
|
126
|
+
category = "(#{m[:level2]}) #{m[:title]}"
|
127
|
+
elsif (m=match_type( line ))
|
128
|
+
type = m[:type]
|
129
|
+
elsif (m=match_hashtag( line ))
|
130
|
+
if tag
|
131
|
+
tags << [tag, type, version, category, attribs.join( ' ' ), descr]
|
132
|
+
end
|
133
|
+
|
134
|
+
tag = m[:name]
|
135
|
+
attribs = []
|
136
|
+
type = nil
|
137
|
+
next_line = :descr ## reset descr to nil - will auto-capture next line
|
138
|
+
elsif (m=match_attribute( line ))
|
139
|
+
attribs << "+#{m[:name]}"
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
if tag
|
144
|
+
tags << [tag, type, version, category, attribs.join( ' ' ), descr]
|
145
|
+
end
|
146
|
+
|
147
|
+
tags
|
148
|
+
end # method parse_tags
|
149
|
+
|
150
|
+
end # class Doc
|
151
|
+
end # class CsvHuman
|