csvhuman 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +18 -0
- data/config/attributes.csv +54 -0
- data/config/langs.csv +14 -0
- data/config/tags.csv +45 -0
- data/config/types.csv +7 -0
- data/config/versions.csv +4 -0
- data/lib/csvhuman/base.rb +2 -1
- data/lib/csvhuman/converter.rb +109 -0
- data/lib/csvhuman/doc/helper.rb +108 -0
- data/lib/csvhuman/doc/schema.rb +151 -0
- data/lib/csvhuman/tag.rb +49 -86
- data/lib/csvhuman/version.rb +2 -2
- data/test/data/airports.csv +6 -0
- data/test/data/ebola.csv +76 -0
- data/test/data/hdx/ebola_treatment_centres.csv +94 -0
- data/test/data/hdx/phl_haima_houses_damaged.csv +165 -0
- data/test/data/hdx/zika_cases.csv +303 -0
- data/test/data/unhcr.csv +85 -0
- data/test/test_doc.rb +129 -0
- data/test/test_hdx.rb +28 -0
- data/test/test_misc.rb +28 -0
- data/test/test_type_converters.rb +42 -0
- data/test/test_type_mappings.rb +57 -0
- metadata +20 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eac10294bb4add6317df3fd1eb2dcab35f75315d
|
4
|
+
data.tar.gz: 2072e628e65873a95584fa70fec0bf11a66e2322
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 931ada0926ae767b7b2348c237694ec35222ff06dfdbac7298527744c0ab99444778d5031355b106ed148551646e301c12d2cc32df8b0dd7001e2b91003c259f
|
7
|
+
data.tar.gz: 0ba952f578bfafb466a99073792d9c0173b24bf689571ec140ed8e61a7ef00f7addea40979bb8ce495d1a8a1266ce01845f5e551653f02afb15c0ea516021bf3
|
data/Manifest.txt
CHANGED
@@ -2,20 +2,38 @@ HISTORY.md
|
|
2
2
|
Manifest.txt
|
3
3
|
README.md
|
4
4
|
Rakefile
|
5
|
+
config/attributes.csv
|
6
|
+
config/langs.csv
|
7
|
+
config/tags.csv
|
8
|
+
config/types.csv
|
9
|
+
config/versions.csv
|
5
10
|
lib/csvhuman.rb
|
6
11
|
lib/csvhuman/base.rb
|
7
12
|
lib/csvhuman/column.rb
|
8
13
|
lib/csvhuman/converter.rb
|
14
|
+
lib/csvhuman/doc/helper.rb
|
15
|
+
lib/csvhuman/doc/schema.rb
|
9
16
|
lib/csvhuman/reader.rb
|
10
17
|
lib/csvhuman/tag.rb
|
11
18
|
lib/csvhuman/version.rb
|
19
|
+
test/data/airports.csv
|
20
|
+
test/data/ebola.csv
|
21
|
+
test/data/hdx/ebola_treatment_centres.csv
|
22
|
+
test/data/hdx/phl_haima_houses_damaged.csv
|
23
|
+
test/data/hdx/zika_cases.csv
|
12
24
|
test/data/sample1.csv
|
13
25
|
test/data/sample2.csv
|
14
26
|
test/data/sample3.csv
|
15
27
|
test/data/sample4.csv
|
16
28
|
test/data/test.csv
|
29
|
+
test/data/unhcr.csv
|
17
30
|
test/helper.rb
|
31
|
+
test/test_doc.rb
|
32
|
+
test/test_hdx.rb
|
18
33
|
test/test_header_converter.rb
|
34
|
+
test/test_misc.rb
|
19
35
|
test/test_reader.rb
|
20
36
|
test/test_samples.rb
|
21
37
|
test/test_tags.rb
|
38
|
+
test/test_type_converters.rb
|
39
|
+
test/test_type_mappings.rb
|
@@ -0,0 +1,54 @@
|
|
1
|
+
attribute,since,category,tags,description
|
2
|
+
adolescents,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"Adolescents, loosely defined (precise age range varies); may overlap +children and +adult. You can optionally create custom attributes in addition to this to add precise age ranges, e.g. ""+adolescents +age12_17""."
|
3
|
+
adults,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"Adults, loosely defined (precise age range varies); may overlap +adolescents and +elderly. You can optionally create custom attributes in addition to this to add precise age ranges, e.g. ""+adults +age18_64""."
|
4
|
+
children,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"The associated hashtag applies to non-adults, loosely defined (precise age range varies; may overlap +infants and +adolescents). You can optionally create custom attributes in addition to this to add precise age ranges, e.g. ""+children +age3_11""."
|
5
|
+
elderly,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"Elderly people, loosely defined (precise age range varies). May overlap +adults. You can optionally create custom attributes in addition to this to add precise age ranges, e.g. ""+elderly +age65plus""."
|
6
|
+
f,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,Female people. See also +m and +i.
|
7
|
+
hh,1.1,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"Households (vs +ind for individual people). The exact definition of ""household"" may vary among aid organisations."
|
8
|
+
i,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,Intersex or non-gender-binary people. Use this attribute for any groups who do not identify as male (+m) or female (+f).
|
9
|
+
ind,1.1,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,Individual people (vs +hh for households).
|
10
|
+
infants,1.0,(1) Sex- and-age disaggregation (SADD) attributes,#affected #inneed #population #reached #targeted,"Infant children, loosely defined (precise age range varies; may overlap +children). You can optionally create custom attributes in addition to this to add precise age ranges, e.g. ""+infants +age0_2""."
|
11
|
+
m,1.0,(2) Organisation and activity attributes,#affected #inneed #population #reached #targeted,Male people. See also +f and +i.
|
12
|
+
activity,1.1,(2) Organisation and activity attributes,#activity,"The implementers classify this activity as an ""activity"" proper (may imply different hierarchical levels in different contexts)."
|
13
|
+
cluster,1.1,(2) Organisation and activity attributes,#sector,Identifies a sector as a formal IASC humanitarian cluster.
|
14
|
+
funder,1.0,(2) Organisation and activity attributes,#org,Funding org/agency (e.g. donor).
|
15
|
+
impl,1.0,(2) Organisation and activity attributes,#org,Implementing partner.
|
16
|
+
prog,1.0,(2) Organisation and activity attributes,#org,Programming org/agency.
|
17
|
+
programme,1.1,(2) Organisation and activity attributes,#activity,"The implementers classify this activity as a ""programme"" (may imply different hierarchical levels in different contexts)."
|
18
|
+
project,1.1,(2) Organisation and activity attributes,#activity,"The implementers classify this activity as a ""project"" (may imply different hierarchical levels in different contexts)."
|
19
|
+
provided,1.1,(2) Organisation and activity attributes,#item #service,"Refers to a #service, #item, etc. that has been provided to people in need."
|
20
|
+
used,1.1,(3) Classification attributes,#item #service,"Refers to a #service, #item, etc. that affected people have actually consumed or otherwise taken advantage of."
|
21
|
+
code,1.0,(3) Classification attributes,#activity #adm1 #adm2 #adm3 #adm4 #adm5 #beneficiary #cause #channel #country #crisis #currency #event #group #impact #indicator #item #loc #modality #need #org #output #region #sector #service #severity #status #subsector,"A unique, machine-readable code."
|
22
|
+
type,1.0,(4) Geographical attributes,#access #activity #beneficiary #capacity #cause #contact #crisis #description #event #frequency #group #impact #indicator #item #loc #need #operations #org #output #sector #service #severity #subsector,"Types or categories. Use with #org, #loc, #indicator, etc to provide classification information."
|
23
|
+
bounds,1.0,(4) Geographical attributes,#geo,Boundary data (e.g. inline GeoJSON).
|
24
|
+
coord,1.0,(4) Geographical attributes,#geo,Geodetic coordinates (lat+lon together).
|
25
|
+
dest,1.1,(4) Geographical attributes,#adm1 #adm2 #adm3 #adm4 #adm5 #country #loc #region,Place of destination (intended or actual).
|
26
|
+
elevation,1.0,(4) Geographical attributes,#geo,Elevation above sea level (usually metres).
|
27
|
+
lat,1.0,(4) Geographical attributes,#geo,Latitude (decimal degrees preferred).
|
28
|
+
lon,1.0,(4) Geographical attributes,#geo,Longitude (decimal degrees preferred).
|
29
|
+
origin,1.1,(5) Date attributes,#adm1 #adm2 #adm3 #adm4 #adm5 #country #loc #region,"The data describes places of origin (intended or actual), e.g. the country of origin for displaced people."
|
30
|
+
approved,1.0,(5) Date attributes,#date,Date or time when something was approved.
|
31
|
+
canceled,1.0,(5) Date attributes,#date,Date or time when something (e.g. an #activity) was canceled.
|
32
|
+
converted,1.1,(5) Date attributes,#date,Date or time used for converting a monetary value to another currency.
|
33
|
+
end,1.0,(5) Date attributes,#date,Date or time when something finished or will finish.
|
34
|
+
occurred,1.0,(5) Date attributes,#date,Date or time when something took place.
|
35
|
+
reported,1.0,(5) Date attributes,#date,Date or time when the information was reported.
|
36
|
+
start,1.0,(6) Impact attributes,#date,Date or time when something started or will start.
|
37
|
+
abducted,1.1,(6) Impact attributes,#affected #inneed #reached #targeted,Hashtag refers to people who have been abducted.
|
38
|
+
displaced,1.0,(6) Impact attributes,#affected #inneed #population #reached #targeted,Displaced people or households. Refers to all types of displacement: use +idps or +refugees to be more specific.
|
39
|
+
idps,1.0,(6) Impact attributes,#affected #inneed #population #reached #targeted,Internally-displaced people or households. More specific than +displaced.
|
40
|
+
incamp,1.0,(6) Impact attributes,#affected #inneed #population #reached #targeted,Located in camps.
|
41
|
+
infected,1.0,(6) Impact attributes,#affected #inneed #reached #targeted,People infected with a disease.
|
42
|
+
injured,1.0,(6) Impact attributes,#affected #inneed #reached #targeted,People injured.
|
43
|
+
killed,1.0,(6) Impact attributes,#affected,People killed.
|
44
|
+
noncamp,1.0,(6) Impact attributes,#affected #inneed #population #reached #targeted,Not located in camps.
|
45
|
+
refugees,1.0,(7) General attributes,#affected #inneed #population #reached #targeted,Refugee people or households. More specific than +displaced.
|
46
|
+
email,1.0,(7) General attributes,#contact,Email address.
|
47
|
+
id,1.1,(7) General attributes,#meta,Use with #meta to provide internal identifiers for data records.
|
48
|
+
label,1.0,(7) General attributes,,Text labels (for a table or chart).
|
49
|
+
name,1.0,(7) General attributes,#activity #adm1 #adm2 #adm3 #adm4 #adm5 #beneficiary #cause #channel #contact #country #crisis #event #group #impact #indicator #item #loc #modality #need #org #output #region #sector #service #severity #status #subsector,"Human-readable name, title, or label."
|
50
|
+
num,1.0,(7) General attributes,#capacity #indicator #output,"The data consists of quantitative, numeric information."
|
51
|
+
phone,1.0,(7) General attributes,#contact,The data consists of #contact phone numbers.
|
52
|
+
source,1.0,(7) General attributes,#meta,Information source for the data in the row or record.
|
53
|
+
text,1.0,(7) General attributes,#indicator,"The data consists of qualitative, narrative textual information."
|
54
|
+
url,1.0,(7) General attributes,#activity #contact #meta #org #service,"The data consists of web links related to the main hashtag (e.g. for an #org, #service, #activity, #loc, etc)."
|
data/config/langs.csv
ADDED
data/config/tags.csv
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
tag,type,since,category,attributes,description
|
2
|
+
adm1,,1.0,(1) Places,+code +dest +name +origin,Top-level subnational administrative area (e.g. a governorate in Syria).
|
3
|
+
adm2,,1.0,(1) Places,+code +dest +name +origin,Second-level subnational administrative area (e.g. a subdivision in Bangladesh).
|
4
|
+
adm3,,1.0,(1) Places,+code +dest +name +origin,Third-level subnational administrative area (e.g. a subdistrict in Afghanistan).
|
5
|
+
adm4,,1.0,(1) Places,+code +dest +name +origin,Fourth-level subnational administrative area (e.g. a barangay in the Philippines).
|
6
|
+
adm5,,1.0,(1) Places,+code +dest +name +origin,Fifth-level subnational administrative area (e.g. a ward of a city).
|
7
|
+
country,,1.0,(1) Places,+code +dest +name +origin,Country (often left implied in a dataset). Also sometimes known as admin level 0.
|
8
|
+
geo,,1.0,(1) Places,+bounds +coord +elevation +lat +lon,"Geodetic geometry information (points, lines, shapes). Use for latitude and longitude, as well as bounds information."
|
9
|
+
loc,,1.0,(1) Places,+code +dest +name +origin +type,"Any general location, such as a village, camp, or clinic."
|
10
|
+
region,,1.0,(2) People and households,+code +dest +name +origin,"A broad, supra- or cross-national geographical region (e.g. Sahel, Horn of Africa, Central Asia, Caribbean). Not to be confused with ""region"" used as the name of a subnational area (#adm1) in some countries."
|
11
|
+
affected,number,1.0,(2) People and households,+abducted +adolescents +adults +children +displaced +elderly +f +hh +i +idps +incamp +ind +infants +infected +injured +killed +m +noncamp +refugees,Number of people or households affected by an emergency. Subset of #population; superset of #inneed.
|
12
|
+
beneficiary,,1.0,(2) People and households,+code +name +type,"General (non-numeric) information about a person or group meant to benefit from aid activities, e.g. ""lactating women""."
|
13
|
+
inneed,number,1.0,(2) People and households,+abducted +adolescents +adults +children +displaced +elderly +f +hh +i +idps +incamp +ind +infants +infected +injured +m +noncamp +refugees,Number of people or households in need of humanitarian assistance. Subset of #affected; superset of #targeted.
|
14
|
+
population,number,1.0,(2) People and households,+adolescents +adults +children +displaced +elderly +f +hh +i +idps +incamp +ind +infants +m +noncamp +refugees,"General population number for an area or location, regardless of their specific humanitarian needs."
|
15
|
+
reached,number,1.0,(2) People and households,+abducted +adolescents +adults +children +displaced +elderly +f +hh +i +idps +incamp +ind +infants +infected +injured +m +noncamp +refugees,Number of people or households reached with humanitarian assistance. Subset of #targeted.
|
16
|
+
respondee,,1.1,(2) People and households,,"Descriptive information, such as name, identifier, or traits, for a single respondee (person, household, etc.) in survey-style data."
|
17
|
+
targeted,number,1.0,(3) Responses and other operations,+abducted +adolescents +adults +children +displaced +elderly +f +hh +i +idps +incamp +ind +infants +infected +injured +m +noncamp +refugees,Number of people or households targeted for humanitarian assistance. Subset of #inneed; superset of #reached.
|
18
|
+
access,,1.1,(3) Responses and other operations,+type,"Accessiblity and constraints on access to a market, distribution point, facility, etc."
|
19
|
+
activity,,1.0,(3) Responses and other operations,+activity +code +name +programme +project +type +url,"A programme, project, or other activity. This hashtag applies to all levels; use the attributes +activity, +project, or +programme to distinguish different hierarchical levels."
|
20
|
+
capacity,,1.0,(3) Responses and other operations,+num +type,"The response capacity of the entity being described (e.g. ""25 beds"")."
|
21
|
+
contact,,1.0,(3) Responses and other operations,+email +name +phone +type +url,Contact information for the subject of a data record (e.g. an activity).
|
22
|
+
frequency,,1.1,(3) Responses and other operations,+type,The frequency with which something occurs.
|
23
|
+
indicator,,1.0,(3) Responses and other operations,+code +name +num +text +type,"A general hashtag for an indicator being tracked. See also #output, #capacity, #need, #impact, #severity, #affected, #inneed, #targeted, and #reached for more-specific indicator-related hashtags."
|
24
|
+
item,,1.1,(3) Responses and other operations,+code +name +provided +type +used,"Physical things provided, stored, shipped, available, used, etc."
|
25
|
+
need,,1.1,(3) Responses and other operations,+code +name +type,"A(n) (unfulfilled) need for an affected person, household, group, or population."
|
26
|
+
operations,,1.0,(3) Responses and other operations,+type,"Information that affects humanitarian operations, such as a restriction on movement or road closure."
|
27
|
+
org,,1.0,(3) Responses and other operations,+code +funder +impl +name +prog +type +url,"An organisation contributing to a humanitarian emergency response, e.g. a local government, community-based organisation, NGO, agency, donor, or law-enforcement or military unit. Use #group for organisations that are not part of the emergency response (e.g. a paramilitary group)."
|
28
|
+
output,,1.0,(3) Responses and other operations,+code +name +num +type,"An output indicator (e.g. ""number of water-purification kits distributed""). A more-specific alternative to #indicator, especially for 3W-style activity reports."
|
29
|
+
sector,,1.0,(3) Responses and other operations,+cluster +code +name +type,A humanitarian cluster or sector.
|
30
|
+
service,,1.1,(3) Responses and other operations,+code +name +provided +type +url +used,"A service used or needed by an affected person, household, group, or population."
|
31
|
+
subsector,,1.0,(4) Cash and finance,+code +name +type,A humanitarian subsector.
|
32
|
+
channel,,1.1,(4) Cash and finance,+code +name,The detailed method of delivering aid (e.g. smartcard vs mobile transfer). More specific than #modality.
|
33
|
+
currency,,1.1,(4) Cash and finance,+code,"Name or ISO 4217 currency code for all financial #value cells in the row (e.g. ""EUR""). Typically used together with #value in financial or cash data."
|
34
|
+
modality,,1.1,(4) Cash and finance,+code +name,"The means by which an aid activity is accomplished. For cash transfers, values might include ""cash"", ""vouchers"", ""in-kind"", etc. May also be used for other types of modalities in other contexts."
|
35
|
+
value,number,1.1,"(5) Crises, incidents, and events",,"A monetary value, such as the price of goods in a market, a project budget, or the amount of cash transferred to beneficiaries. May be used together with #currency in financial or cash data."
|
36
|
+
cause,,1.0,"(5) Crises, incidents, and events",+code +name +type,"The cause of an event, crisis, etc."
|
37
|
+
crisis,,1.0,"(5) Crises, incidents, and events",+code +name +type,A humanitarian emergency.
|
38
|
+
event,,1.0,"(5) Crises, incidents, and events",+code +name +type,"An individual event or incident within a crisis/emergency, such as a (localised) flood, bridge collapse, or conflict."
|
39
|
+
group,,1.0,"(5) Crises, incidents, and events",+code +name +type,"A non-humanitarian group (of any type) related to humanitarian crisis (e.g., a paramilitary group) Use #org instead for a humanitarian organisation such as an NGO, contributing to the humanitarian response."
|
40
|
+
impact,,1.0,"(5) Crises, incidents, and events",+code +name +type,The impact of a crisis on a group or other entity.
|
41
|
+
severity,,1.0,(6) Metadata,+code +name +type,Severity of the crisis or event.
|
42
|
+
date,date,1.0,(6) Metadata,+approved +canceled +converted +end +occurred +reported +start,"Date related to the data in the record applies. Preferred format is ISO 8610 (e.g. ""2015-06-01"", ""2015-Q1"", etc.)"
|
43
|
+
description,,1.0,(6) Metadata,+type,Long description for a data record.
|
44
|
+
meta,,1.0,(6) Metadata,+id +source +url,Metadata about a row.
|
45
|
+
status,,1.0,(6) Metadata,+code +name,"Project/activity status description (such as ""planned"", ""active"", ""canceled"", or ""complete"")."
|
data/config/types.csv
ADDED
data/config/versions.csv
ADDED
data/lib/csvhuman/base.rb
CHANGED
data/lib/csvhuman/converter.rb
CHANGED
@@ -18,4 +18,113 @@ HEADER_CONVERTERS = {
|
|
18
18
|
gsub( /[^\w]+/, '' ).to_sym }
|
19
19
|
}
|
20
20
|
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
def self.guess_type( name, attributes )
|
25
|
+
if name == 'date'
|
26
|
+
if attributes.include?( 'year' )
|
27
|
+
Integer ## just the year (e.g. 2011); use an integer number
|
28
|
+
else
|
29
|
+
Date
|
30
|
+
end
|
31
|
+
## todo/fix: add more well-known names with num required!!!
|
32
|
+
elsif ['affected', 'inneed', 'targeted', 'reached', 'population'].include?( name )
|
33
|
+
Integer
|
34
|
+
else
|
35
|
+
## check attributes
|
36
|
+
if attributes.nil? || attributes.empty?
|
37
|
+
String ## assume (default to) string
|
38
|
+
elsif attributes.include?( 'num' ) ||
|
39
|
+
attributes.include?( 'id') ## assume id is (always) a rowid - why? why not?
|
40
|
+
Integer
|
41
|
+
elsif attributes.include?( 'date' ) ### todo/check: exists +date?
|
42
|
+
Date
|
43
|
+
elsif name == 'geo' && (attributes.include?('lat') ||
|
44
|
+
attributes.include?('lon') ||
|
45
|
+
attributes.include?('elevation'))
|
46
|
+
Float
|
47
|
+
elsif attributes.include?( 'killed' ) ||
|
48
|
+
attributes.include?( 'injured' ) ||
|
49
|
+
attributes.include?( 'infected' ) ||
|
50
|
+
attributes.include?( 'displaced' ) ||
|
51
|
+
attributes.include?( 'idps' ) ||
|
52
|
+
attributes.include?( 'refugees' ) ||
|
53
|
+
attributes.include?( 'abducted' ) ||
|
54
|
+
attributes.include?( 'threatened' ) ||
|
55
|
+
attributes.include?( 'affected' ) ||
|
56
|
+
attributes.include?( 'inneed' ) ||
|
57
|
+
attributes.include?( 'targeted' ) ||
|
58
|
+
attributes.include?( 'reached' )
|
59
|
+
Integer
|
60
|
+
else
|
61
|
+
String ## assume (default to) string
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
## convert guess_type to proc (is there a better/idomatic way)?
|
68
|
+
# ->(name, attributes) { guess_type( name, attributes ) }
|
69
|
+
## TYPE_MAPPING_GUESS = Kernel.method( :guess_type )
|
70
|
+
|
71
|
+
TYPE_MAPPINGS = {
|
72
|
+
## always returns string (that is, keep as is (assumes always string values))
|
73
|
+
none: ->(name, attributes) { String },
|
74
|
+
guess: ->(name, attributes) { guess_type( name, attributes ) },
|
75
|
+
}
|
76
|
+
|
77
|
+
## add aliases (check - is there a better/idomatic way?)
|
78
|
+
TYPE_MAPPINGS[ :default] = TYPE_MAPPINGS[:guess] ## alias for guess
|
79
|
+
TYPE_MAPPINGS[ :all ] = TYPE_MAPPINGS[:guess] ## alias for guess (yes, another one - why? why not?)
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
def self.convert_to_i( value )
|
86
|
+
if value.nil? || value.empty?
|
87
|
+
nil ## return nil - why? why not?
|
88
|
+
else
|
89
|
+
Integer( value )
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.convert_to_f( value )
|
94
|
+
if value.nil? || value.empty?
|
95
|
+
nil ## return nil - why? why not?
|
96
|
+
else
|
97
|
+
## todo/fix: add support for NaN, Inf, -Inf etc.
|
98
|
+
## how to deal with conversion errors (throw exception? ignore? NaN? why? why not?)
|
99
|
+
Float( value )
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def self.convert_to_date( value )
|
104
|
+
if value.nil? || value.empty?
|
105
|
+
nil ## return nil - why? why not?
|
106
|
+
else
|
107
|
+
## todo/fix: add support for more formats
|
108
|
+
## how to deal with conversion errors (throw exception? ignore? why? why not?)
|
109
|
+
if value =~ /\d{4}-\d{1,2}-\d{1,2}/ ### todo: check if 2014-1-9 works for strptime too (leading zero rquired)?
|
110
|
+
Date.strptime( value, "%Y-%m-%d" ) # 2014-11-09
|
111
|
+
elsif value =~ /\d{1,2}\/\d{1,2}\/\d{4}/
|
112
|
+
Date.strptime( value, "%d/%m/%Y" ) # 09/11/2014
|
113
|
+
else
|
114
|
+
## todo/fix: throw argument/value error - why? why not
|
115
|
+
nil
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
|
122
|
+
TYPE_CONVERTERS = {
|
123
|
+
Integer => ->(value) { convert_to_i(value) },
|
124
|
+
Float => ->(value) { convert_to_f(value) },
|
125
|
+
Date => ->(value) { convert_to_date(value) },
|
126
|
+
}
|
127
|
+
|
128
|
+
|
129
|
+
|
21
130
|
end # class CsvHuman
|
@@ -0,0 +1,108 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
class CsvHuman
|
4
|
+
module DocHelper
|
5
|
+
|
6
|
+
|
7
|
+
HASHTAG_LINE_RX= /^
|
8
|
+
\s*
|
9
|
+
\#
|
10
|
+
(?<name>[a-z][a-z0-9]+)
|
11
|
+
\s*
|
12
|
+
$/x
|
13
|
+
|
14
|
+
def match_hashtag( line )
|
15
|
+
if (m=HASHTAG_LINE_RX.match(line))
|
16
|
+
puts "hashtag >#{m[:name]}<"
|
17
|
+
m
|
18
|
+
else
|
19
|
+
nil
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
## note: attrib might be one letter only (e.g.) +m,+f, etc.
|
26
|
+
ATTRIBUTE_LINE_RX= /^
|
27
|
+
\s*
|
28
|
+
\+
|
29
|
+
(?<name>[a-z][a-z0-9]*)
|
30
|
+
\s*
|
31
|
+
$/x
|
32
|
+
|
33
|
+
def match_attribute( line )
|
34
|
+
if (m=ATTRIBUTE_LINE_RX.match(line))
|
35
|
+
puts "attrib >#{m[:name]}<"
|
36
|
+
m
|
37
|
+
else
|
38
|
+
false
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
##
|
45
|
+
## e.g. 1.1. Places
|
46
|
+
## 2.1. Sex- and-age disaggregation (SADD) attributes
|
47
|
+
|
48
|
+
HEADING_LINE_RX=/^
|
49
|
+
\s*
|
50
|
+
(?<level1>[1-9])
|
51
|
+
\.
|
52
|
+
(?<level2>[1-9])
|
53
|
+
\.
|
54
|
+
\s+
|
55
|
+
(?<title>.+?)
|
56
|
+
\s*
|
57
|
+
$/x
|
58
|
+
|
59
|
+
def match_heading( line )
|
60
|
+
if (m=HEADING_LINE_RX.match(line))
|
61
|
+
puts "heading #{m[:level1]}.#{m[:level2]}. (#{m[:level2]}) >#{m[:title]}<"
|
62
|
+
m
|
63
|
+
else
|
64
|
+
false
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
|
70
|
+
TYPE_RX = /Every value must be a (?<type>[a-z]+)./
|
71
|
+
def match_type( line )
|
72
|
+
if (m=TYPE_RX.match(line))
|
73
|
+
puts "type: >#{m[:type]}<"
|
74
|
+
m
|
75
|
+
else
|
76
|
+
false
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
|
82
|
+
SINCE_HXL_RX = /Since HXL (?<version>[1]\.[0-9])\.?/
|
83
|
+
def match_since_hxl( line )
|
84
|
+
if (m=SINCE_HXL_RX.match(line))
|
85
|
+
puts "version: >#{m[:version]}<"
|
86
|
+
m
|
87
|
+
else
|
88
|
+
false
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
def split_descr( line )
|
95
|
+
if( m=match_since_hxl( line ))
|
96
|
+
version = m[:version]
|
97
|
+
## remove "Since HXL 1.0" from text
|
98
|
+
text = line.gsub( SINCE_HXL_RX, '' ).strip
|
99
|
+
else
|
100
|
+
version = '?'
|
101
|
+
text = line
|
102
|
+
end
|
103
|
+
[text,version]
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
end # module DocHelper
|
108
|
+
end # class CsvHuman
|
@@ -0,0 +1,151 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
class CsvHuman
|
4
|
+
class Doc ## tags and attributes (schema) reader / converter (txt to csv)
|
5
|
+
include DocHelper
|
6
|
+
|
7
|
+
|
8
|
+
|
9
|
+
def self.read_attributes( path )
|
10
|
+
self.open( path ) { |doc| doc.parse_attributes }
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.read_tags( path )
|
14
|
+
self.open( path ) { |doc| doc.parse_tags }
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.open( path, mode=nil, &block ) ## rename path to filename or name - why? why not?
|
18
|
+
|
19
|
+
## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
|
20
|
+
f = File.open( path, mode ? mode : 'r:bom|utf-8' )
|
21
|
+
doc = self.new( f )
|
22
|
+
|
23
|
+
# handle blocks like Ruby's open(), not like the (old old) CSV library
|
24
|
+
if block_given?
|
25
|
+
begin
|
26
|
+
block.call( doc )
|
27
|
+
ensure
|
28
|
+
f.close
|
29
|
+
end
|
30
|
+
else
|
31
|
+
doc ## note: caller responsible for closing (todo/fix: add close,closed? to doc!!!)
|
32
|
+
end
|
33
|
+
end # method self.open
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
def initialize( str_or_readable )
|
39
|
+
# note: must (only) support/respond_to read_line
|
40
|
+
@input = str_or_readable
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
def parse_attributes
|
45
|
+
|
46
|
+
attrib = nil
|
47
|
+
category = nil
|
48
|
+
descr = nil
|
49
|
+
version = nil
|
50
|
+
tags = []
|
51
|
+
|
52
|
+
next_line = nil ## e.g. set to :descr
|
53
|
+
|
54
|
+
attribs = []
|
55
|
+
|
56
|
+
@input.each_line do |line|
|
57
|
+
line = line.chomp( '' )
|
58
|
+
|
59
|
+
line = line.strip ## remove leading and trailing spaces
|
60
|
+
|
61
|
+
|
62
|
+
next if line.empty? || line.start_with?( '%' ) ## skip blank lines and comment lines
|
63
|
+
|
64
|
+
if next_line == :descr
|
65
|
+
## auto-capture next line (if descr reset to nil)
|
66
|
+
descr, version = split_descr( line )
|
67
|
+
puts "descr >#{descr}<, version >#{version}<"
|
68
|
+
|
69
|
+
next_line = nil
|
70
|
+
elsif (m=match_heading( line ))
|
71
|
+
category = "(#{m[:level2]}) #{m[:title]}"
|
72
|
+
elsif (m=match_attribute( line ))
|
73
|
+
if attrib
|
74
|
+
attribs << [attrib, version, category, tags.join( ' ' ), descr]
|
75
|
+
end
|
76
|
+
|
77
|
+
attrib = m[:name]
|
78
|
+
tags = []
|
79
|
+
next_line = :descr ## reset descr to nil - will auto-capture next line
|
80
|
+
elsif (m=match_hashtag( line ))
|
81
|
+
tags << "##{m[:name]}"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
if attrib
|
86
|
+
attribs << [attrib, version, category, tags.join( ' ' ), descr]
|
87
|
+
end
|
88
|
+
|
89
|
+
attribs
|
90
|
+
end # method parse_attributes
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
def parse_tags
|
95
|
+
|
96
|
+
tag = nil
|
97
|
+
type = nil
|
98
|
+
category = nil
|
99
|
+
descr = nil
|
100
|
+
version = nil
|
101
|
+
attribs = []
|
102
|
+
|
103
|
+
next_line = nil ## e.g. set to :descr
|
104
|
+
|
105
|
+
|
106
|
+
tags = []
|
107
|
+
|
108
|
+
@input.each_line do |line|
|
109
|
+
line = line.chomp( '' )
|
110
|
+
|
111
|
+
line = line.strip ## remove leading and trailing spaces
|
112
|
+
|
113
|
+
|
114
|
+
next if line.empty? || line.start_with?( '%' ) ## skip blank lines and comment lines
|
115
|
+
|
116
|
+
if next_line == :descr
|
117
|
+
## auto-capture next line (if descr reset to nil)
|
118
|
+
descr, version = split_descr( line )
|
119
|
+
|
120
|
+
## descr = "(2) People and households" if descr == "(2) Surveys and assessments"
|
121
|
+
|
122
|
+
puts "descr >#{descr}<, version >#{version}<"
|
123
|
+
|
124
|
+
next_line = nil
|
125
|
+
elsif (m=match_heading( line ))
|
126
|
+
category = "(#{m[:level2]}) #{m[:title]}"
|
127
|
+
elsif (m=match_type( line ))
|
128
|
+
type = m[:type]
|
129
|
+
elsif (m=match_hashtag( line ))
|
130
|
+
if tag
|
131
|
+
tags << [tag, type, version, category, attribs.join( ' ' ), descr]
|
132
|
+
end
|
133
|
+
|
134
|
+
tag = m[:name]
|
135
|
+
attribs = []
|
136
|
+
type = nil
|
137
|
+
next_line = :descr ## reset descr to nil - will auto-capture next line
|
138
|
+
elsif (m=match_attribute( line ))
|
139
|
+
attribs << "+#{m[:name]}"
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
if tag
|
144
|
+
tags << [tag, type, version, category, attribs.join( ' ' ), descr]
|
145
|
+
end
|
146
|
+
|
147
|
+
tags
|
148
|
+
end # method parse_tags
|
149
|
+
|
150
|
+
end # class Doc
|
151
|
+
end # class CsvHuman
|