hsds_transformer 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/api.rb +52 -0
- data/lib/datapackage/datapackage.json +1579 -0
- data/lib/datapackage/open211_miami_datapackage.json +1007 -0
- data/lib/hsds_transformer/base_transformer.rb +182 -0
- data/lib/hsds_transformer/custom/ilao_transformer.rb +80 -0
- data/lib/hsds_transformer/custom/open211_miami_transformer.rb +168 -0
- data/lib/hsds_transformer/exceptions.rb +5 -0
- data/lib/hsds_transformer/file_paths.rb +40 -0
- data/lib/hsds_transformer/headers.rb +31 -0
- data/lib/hsds_transformer/runner.rb +32 -0
- data/lib/hsds_transformer.rb +15 -0
- data/lib/support.rb +31 -0
- metadata +229 -0
@@ -0,0 +1,182 @@
|
|
1
|
+
module HsdsTransformer
|
2
|
+
class BaseTransformer
|
3
|
+
include HsdsTransformer::Headers
|
4
|
+
include HsdsTransformer::FilePaths
|
5
|
+
|
6
|
+
attr_reader :mapping, :include_custom
|
7
|
+
|
8
|
+
SUPPORTED_HSDS_MODELS = %w(organizations services locations physical_addresses postal_addresses phones service_taxonomies regular_schedules taxonomies accessibility_for_disabilities contacts languages eligibilities services_at_locations service_areas)
|
9
|
+
|
10
|
+
def self.run(args)
|
11
|
+
new(args).transform
|
12
|
+
end
|
13
|
+
|
14
|
+
# TODO validate that incoming data is valid-ish, like unique IDs
|
15
|
+
def initialize(args)
|
16
|
+
@mapping = parse_mapping(args[:mapping])
|
17
|
+
|
18
|
+
@include_custom = args[:include_custom]
|
19
|
+
@zip_output = args[:zip_output]
|
20
|
+
|
21
|
+
SUPPORTED_HSDS_MODELS.each do |model|
|
22
|
+
var_name = "@" + model
|
23
|
+
instance_variable_set(var_name, [])
|
24
|
+
end
|
25
|
+
|
26
|
+
set_file_paths(args)
|
27
|
+
end
|
28
|
+
|
29
|
+
def transform
|
30
|
+
# Initial transformation into HSDS
|
31
|
+
mapping.each do |input_file_name, file_mapping|
|
32
|
+
transform_file(input_file_name, file_mapping)
|
33
|
+
end
|
34
|
+
|
35
|
+
# HSDS additional formatting
|
36
|
+
singletonize_languages
|
37
|
+
|
38
|
+
apply_custom_transformation
|
39
|
+
|
40
|
+
# make data path for these files
|
41
|
+
Dir.mkdir(output_datapackage_path) unless Dir.exists?(output_datapackage_path)
|
42
|
+
Dir.mkdir(output_data_path) unless Dir.exists?(output_data_path)
|
43
|
+
|
44
|
+
# Write the data to CSV files
|
45
|
+
write_output_files
|
46
|
+
|
47
|
+
zip_output if @zip_output
|
48
|
+
|
49
|
+
return self
|
50
|
+
end
|
51
|
+
|
52
|
+
def transform_file(input_file_name, file_mapping)
|
53
|
+
path = @input_path + input_file_name
|
54
|
+
org_mapping = file_mapping["columns"]
|
55
|
+
|
56
|
+
# Now we want to process each row in a way that allows the row to create multiple objects,
|
57
|
+
# including multiple objects from the same rows.
|
58
|
+
CSV.foreach(path, headers: true) do |input|
|
59
|
+
collected_data = hsds_objects_from_row(input, org_mapping)
|
60
|
+
collect_into_ivars(collected_data)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
# This is defined in custom transformer if there is one
|
66
|
+
def apply_custom_transformation
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
def hsds_objects_from_row(input, org_mapping)
|
72
|
+
collected_data = {}
|
73
|
+
|
74
|
+
# k is the input field_name
|
75
|
+
# org_mapping[k] gives us the array of output fields
|
76
|
+
input.each do |k,v|
|
77
|
+
# turn this into array to be backwards compatible
|
78
|
+
output_fields = org_mapping[k].is_a?(Array) ? org_mapping[k] : [org_mapping[k]]
|
79
|
+
|
80
|
+
# now lets collect each object
|
81
|
+
output_fields.compact.each do |output_field|
|
82
|
+
|
83
|
+
# collected_data[output_field["model"]] should make it such that collected_data = { "organizations" => {} }
|
84
|
+
collected_data[output_field["model"]] ||= {}
|
85
|
+
|
86
|
+
# Append all string fields marked as "append" to single output field
|
87
|
+
if output_field["append"]
|
88
|
+
existing_string_value = collected_data[output_field["model"]][output_field["field"]] || ""
|
89
|
+
existing_string_value += v.to_s unless null_type(v)
|
90
|
+
|
91
|
+
collected_data[output_field["model"]].merge!(output_field["field"] => existing_string_value)
|
92
|
+
else
|
93
|
+
if output_field["map"]
|
94
|
+
value = output_field["map"][v]
|
95
|
+
else
|
96
|
+
value = v
|
97
|
+
end
|
98
|
+
safe_val = null_type(value) ? nil : value
|
99
|
+
collected_data[output_field["model"]].merge!(output_field["field"] => safe_val)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
collected_data
|
104
|
+
end
|
105
|
+
|
106
|
+
def null_type(string)
|
107
|
+
string.nil? || string.downcase.strip == "null"
|
108
|
+
end
|
109
|
+
|
110
|
+
# Now let's pop each object into its respective instance variable collection to be written to the right file
|
111
|
+
def collect_into_ivars(collected_data)
|
112
|
+
SUPPORTED_HSDS_MODELS.each do |model|
|
113
|
+
collection_ivar(model) << collected_data[model] if collected_data[model] && !collected_data[model].empty?
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def collection_ivar(model)
|
118
|
+
var_name = "@" + model
|
119
|
+
instance_variable_get(var_name)
|
120
|
+
end
|
121
|
+
|
122
|
+
def singletonize_languages
|
123
|
+
formatted_langs = @languages.each_with_object([]) do |language_row, array|
|
124
|
+
langs = language_row["language"].to_s.split(",")
|
125
|
+
if langs.size > 1
|
126
|
+
langs.each do |lang|
|
127
|
+
array << language_row.clone.merge("language" => lang.strip)
|
128
|
+
end
|
129
|
+
else
|
130
|
+
array << language_row
|
131
|
+
end
|
132
|
+
end
|
133
|
+
@languages = formatted_langs
|
134
|
+
end
|
135
|
+
|
136
|
+
def write_output_files
|
137
|
+
SUPPORTED_HSDS_MODELS.each do |model|
|
138
|
+
path_var = instance_variable_get "@output_#{model}_path"
|
139
|
+
write_csv path_var, headers(collection_ivar(model).first, model), collection_ivar(model)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def zip_output
|
144
|
+
input_data_files = Dir.glob(File.join(output_data_path, '**/*'))
|
145
|
+
|
146
|
+
|
147
|
+
File.delete(zipfile_name) if File.exists?(zipfile_name)
|
148
|
+
|
149
|
+
Zip::File.open(zipfile_name, Zip::File::CREATE) do |zipfile|
|
150
|
+
# Add databpackage.json
|
151
|
+
zipfile.add("datapackage.json", datapackage_json_path)
|
152
|
+
|
153
|
+
# Add data files
|
154
|
+
input_data_files.each do |file_path|
|
155
|
+
zipped_name = "data/" + File.basename(file_path)
|
156
|
+
zipfile.add(zipped_name, file_path)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
# This also dedupes data by calling `uniq` on each collection before writing
|
162
|
+
def write_csv(path, headers, data)
|
163
|
+
return if data.empty?
|
164
|
+
CSV.open(path, 'wb') do |csv|
|
165
|
+
csv << headers
|
166
|
+
data.uniq.each do |row|
|
167
|
+
csv << CSV::Row.new(row.keys, row.values).values_at(*headers) unless row.values.all?(nil)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def parse_mapping(mapping_path)
|
173
|
+
if mapping_path[0..3] == "http"
|
174
|
+
uri = URI(mapping_path)
|
175
|
+
file = Net::HTTP.get(uri)
|
176
|
+
YAML.load file
|
177
|
+
else
|
178
|
+
YAML.load File.read(mapping_path)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
module HsdsTransformer
|
2
|
+
class IlaoTransformer < HsdsTransformer::BaseTransformer
|
3
|
+
|
4
|
+
STATE_ABBREVIATIONS = %w(AK AL AR AZ CA CO CT DC DE FL GA HI IA ID IL IN KS KY LA MA MD ME MI MN MO MS MT NC ND NE NH NJ NM NV NY OH OK OR PA RI SC SD TN TX UT VA VT WA WI WV WY)
|
5
|
+
|
6
|
+
def apply_custom_transformation
|
7
|
+
parse_address_data
|
8
|
+
# process_regular_schedule_text
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def parse_address_data
|
14
|
+
# TODO do this for physical too
|
15
|
+
@postal_addresses.each do |address_row|
|
16
|
+
address_str = address_row["address_1"]
|
17
|
+
postal_code = address_str.split(//).last(5).join
|
18
|
+
postal_code = postal_code.match(/\d{5}/)
|
19
|
+
|
20
|
+
if postal_code != ""
|
21
|
+
address_row["postal_code"] = postal_code.to_s
|
22
|
+
address_str = address_str[0..-7]
|
23
|
+
end
|
24
|
+
|
25
|
+
state = address_str.split(//).last(2).join.upcase
|
26
|
+
|
27
|
+
if STATE_ABBREVIATIONS.include?(state)
|
28
|
+
address_row["state_province"] = state
|
29
|
+
address_str = address_str[0..-5]
|
30
|
+
end
|
31
|
+
|
32
|
+
address_row["address_1"] = address_str
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def process_regular_schedule_text(schedule_key:, schedule_hash:, input:)
|
37
|
+
if input["Hours of operation"]
|
38
|
+
regex_list = input["Hours of operation"].scan(/\S*day: \S*/)
|
39
|
+
for regex in regex_list do
|
40
|
+
day = regex.split(': ')[0]
|
41
|
+
hours = regex.split(': ')[1]
|
42
|
+
if hours == "Closed"
|
43
|
+
opens_at = nil
|
44
|
+
closes_at = nil
|
45
|
+
else
|
46
|
+
opens_at = hours.split('-')[0]
|
47
|
+
closes_at = hours.split('-')[1]
|
48
|
+
end
|
49
|
+
collect_schedule_data(schedule_key: schedule_key,
|
50
|
+
schedule_hash: schedule_hash, input: input,
|
51
|
+
day: day, opens_at: opens_at, closes_at: closes_at)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def collect_schedule_data(schedule_key:, schedule_hash:, input:,
|
57
|
+
day:, opens_at:, closes_at:)
|
58
|
+
schedule_row = {}
|
59
|
+
schedule_row["weekday"] = day
|
60
|
+
schedule_row["opens_at"] = opens_at
|
61
|
+
schedule_row["closes_at"] = closes_at
|
62
|
+
|
63
|
+
foreign_key = schedule_hash["foreign_key_name"]
|
64
|
+
foreign_key_value = schedule_hash["foreign_key_value"]
|
65
|
+
schedule_row[foreign_key] = input[foreign_key_value]
|
66
|
+
schedule_data << schedule_row
|
67
|
+
end
|
68
|
+
|
69
|
+
def collect_sal_data(sal_key:, sal_hash:, input:)
|
70
|
+
key = sal_hash["field"]
|
71
|
+
sal_row = {}
|
72
|
+
sal_row[key] = input[sal_key]
|
73
|
+
|
74
|
+
foreign_key = sal_hash["foreign_key_name"]
|
75
|
+
foreign_key_value = sal_hash["foreign_key_value"]
|
76
|
+
sal_row[foreign_key] = input[foreign_key_value]
|
77
|
+
sal_data << sal_row
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,168 @@
|
|
1
|
+
module HsdsTransformer
|
2
|
+
class Open211MiamiTransformer < HsdsTransformer::BaseTransformer
|
3
|
+
WEEKDAYS = %w(Monday Tuesday Wednesday Thursday Friday)
|
4
|
+
ALL_DAYS = %w(Monday Tuesday Wednesday Thursday Friday Saturday Sunday)
|
5
|
+
DAY_MAPPING = {
|
6
|
+
"mon" => "Monday",
|
7
|
+
"tue" => "Tuesday",
|
8
|
+
"wed" => "Wednesday",
|
9
|
+
"thu" => "Thursday",
|
10
|
+
"fri" => "Friday",
|
11
|
+
"sat" => "Saturday",
|
12
|
+
"sun" => "Sunday",
|
13
|
+
}
|
14
|
+
|
15
|
+
TOP_LEVEL_TAXONOMIES = {
|
16
|
+
"B" => "Basic Needs",
|
17
|
+
"D" => "Consumer Services",
|
18
|
+
"F" => "Criminal Justice and Legal Services",
|
19
|
+
"H" => "Education",
|
20
|
+
"J" => "Environmental Quality",
|
21
|
+
"L" => "Health Care",
|
22
|
+
"N" => "Income Support and Employment",
|
23
|
+
"P" => "Individual and Family Life",
|
24
|
+
"R" => "Mental Health Care and Counseling",
|
25
|
+
"T" => "Organizational/Community/International Services",
|
26
|
+
"Y" => "Target Populations"
|
27
|
+
}
|
28
|
+
|
29
|
+
TAXONOMY_VOCAB = "Open211 Miami - AIRS"
|
30
|
+
|
31
|
+
def apply_custom_transformation
|
32
|
+
remove_child_organizations
|
33
|
+
determine_services
|
34
|
+
parse_regular_schedules_text
|
35
|
+
supplement_taxonomy
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def determine_services
|
41
|
+
new_services = @services.each do |service|
|
42
|
+
# Update the name to remove the org name
|
43
|
+
formatted_name = service["name"].to_s.split(" - ").last
|
44
|
+
service.merge!("name" => formatted_name)
|
45
|
+
|
46
|
+
# Set the org ID as the parent provider id
|
47
|
+
if !service["parent_provider_id"].nil?
|
48
|
+
service.merge!("organization_id" => service["parent_provider_id"])
|
49
|
+
end
|
50
|
+
service.delete "parent_provider_id"
|
51
|
+
service
|
52
|
+
end
|
53
|
+
|
54
|
+
@services = new_services
|
55
|
+
end
|
56
|
+
|
57
|
+
# TODO figure out what to do with 24 hour text
|
58
|
+
# TODO add IDs
|
59
|
+
def parse_regular_schedules_text
|
60
|
+
new_schedules = @regular_schedules.each_with_object([]) do |sched_row, new_sheds|
|
61
|
+
# Schedule times and tidbits are mostly separated by a newline
|
62
|
+
sched_options = sched_row["original_text"].to_s.split("\n")
|
63
|
+
|
64
|
+
sched_options.each do |opt|
|
65
|
+
opt_days = find_days(opt)
|
66
|
+
if all_weekdays?(opt_days)
|
67
|
+
sched_days = WEEKDAYS
|
68
|
+
elsif single_days?(opt_days)
|
69
|
+
sched_days = single_days(opt_days)
|
70
|
+
else
|
71
|
+
sched_days = []
|
72
|
+
end
|
73
|
+
|
74
|
+
sched_days.each do |day|
|
75
|
+
new_sheds << new_sched_row(day, opt, sched_row)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
@regular_schedules = new_schedules
|
81
|
+
end
|
82
|
+
|
83
|
+
def find_days(opt_string)
|
84
|
+
strings = opt_string.to_s.split(", ")[1..-1].compact.flatten
|
85
|
+
strings.map(&:downcase)
|
86
|
+
end
|
87
|
+
|
88
|
+
def all_weekdays?(days)
|
89
|
+
days == ["mon-fri"]
|
90
|
+
end
|
91
|
+
|
92
|
+
def single_days?(days)
|
93
|
+
!single_days(days).empty?
|
94
|
+
end
|
95
|
+
|
96
|
+
def single_days(days)
|
97
|
+
DAY_MAPPING.select{ |day| days.include? day }.values
|
98
|
+
end
|
99
|
+
|
100
|
+
def hours(opt)
|
101
|
+
range = opt.split(", ")[0]
|
102
|
+
times = range.split("-")
|
103
|
+
return unless times.size == 2
|
104
|
+
|
105
|
+
open = clean_time(times[0])
|
106
|
+
close = clean_time(times[1])
|
107
|
+
|
108
|
+
[open, close]
|
109
|
+
end
|
110
|
+
|
111
|
+
# Finds the time in strings like "Admin:\\n9:00am", "9am", "9:0a", "10:00pm"
|
112
|
+
def clean_time(time)
|
113
|
+
/\d{1,2}.*\z/.match(time).to_s
|
114
|
+
end
|
115
|
+
|
116
|
+
def new_sched_row(day, opt, sched_row)
|
117
|
+
open, close = hours(opt)
|
118
|
+
{
|
119
|
+
"service_id" => sched_row["service_id"],
|
120
|
+
"weekday" => day,
|
121
|
+
"opens_at" => open,
|
122
|
+
"closes_at" => close,
|
123
|
+
"original_text" => sched_row["original_text"]
|
124
|
+
}
|
125
|
+
end
|
126
|
+
|
127
|
+
def remove_child_organizations
|
128
|
+
@organizations.reject! do |org|
|
129
|
+
!org["parent_provider_id"].nil?
|
130
|
+
end
|
131
|
+
|
132
|
+
@organizations.each { |org| org.delete("parent_provider_id") }
|
133
|
+
end
|
134
|
+
|
135
|
+
def supplement_taxonomy
|
136
|
+
@taxonomies.each do |tax_row|
|
137
|
+
if tax_row["id"].length == 1
|
138
|
+
category = nil # Already top-level
|
139
|
+
else
|
140
|
+
category = tax_row["id"][0]
|
141
|
+
end
|
142
|
+
|
143
|
+
suppl_attrs = {
|
144
|
+
"parent_id" => category,
|
145
|
+
"parent_name" => TOP_LEVEL_TAXONOMIES[category],
|
146
|
+
"vocabulary" => TAXONOMY_VOCAB
|
147
|
+
}
|
148
|
+
|
149
|
+
tax_row.merge!(suppl_attrs)
|
150
|
+
end
|
151
|
+
|
152
|
+
@taxonomies.concat(top_level_taxonomies)
|
153
|
+
end
|
154
|
+
|
155
|
+
def top_level_taxonomies
|
156
|
+
TOP_LEVEL_TAXONOMIES.map do |key, value|
|
157
|
+
{
|
158
|
+
"id" => key,
|
159
|
+
"name" => value,
|
160
|
+
"taxonomy_facet" => "Service",
|
161
|
+
"parent_id" => nil,
|
162
|
+
"parent_name" => nil,
|
163
|
+
"vocabulary" => TAXONOMY_VOCAB
|
164
|
+
}
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module HsdsTransformer
|
2
|
+
module FilePaths
|
3
|
+
DEFAULT_OUTPUT_PATH = "#{ENV["ROOT_PATH"]}/tmp"
|
4
|
+
DEFAULT_INPUT_PATH = "#{ENV["ROOT_PATH"]}/"
|
5
|
+
|
6
|
+
attr_reader :input_path, :output_path, :output_datapackage_path, :output_data_path, :datapackage_json_path,
|
7
|
+
:zipfile_name, :output_organizations_path, :output_locations_path, :output_services_path,
|
8
|
+
:output_phones_path, :output_physical_addresses_path, :output_postal_addresses_path,
|
9
|
+
:output_services_at_locations_path, :output_eligibilities_path, :output_contacts_path,
|
10
|
+
:output_languages_path, :output_accessibility_for_disabilities_path, :output_taxonomies_path,
|
11
|
+
:output_service_taxonomies_path, :output_regular_schedules_path, :output_service_areas_path
|
12
|
+
|
13
|
+
# TODO DRY this up
|
14
|
+
def set_file_paths(args)
|
15
|
+
@input_path = args[:input_path] || DEFAULT_INPUT_PATH
|
16
|
+
@output_path = args[:output_path] || DEFAULT_OUTPUT_PATH
|
17
|
+
@output_datapackage_path = File.join(output_path, "datapackage")
|
18
|
+
@output_data_path = File.join(output_datapackage_path, "data")
|
19
|
+
@zipfile_name = File.join(output_path, "datapackage.zip")
|
20
|
+
|
21
|
+
@output_organizations_path = output_data_path + "/organizations.csv"
|
22
|
+
@output_locations_path = output_data_path + "/locations.csv"
|
23
|
+
@output_services_path = output_data_path + "/services.csv"
|
24
|
+
@output_phones_path = output_data_path + "/phones.csv"
|
25
|
+
@output_physical_addresses_path = output_data_path + "/physical_addresses.csv"
|
26
|
+
@output_postal_addresses_path = output_data_path + "/postal_addresses.csv"
|
27
|
+
@output_services_at_locations_path = output_data_path + "/services_at_location.csv"
|
28
|
+
@output_eligibilities_path = output_data_path + "/eligibility.csv"
|
29
|
+
@output_contacts_path = output_data_path + "/contacts.csv"
|
30
|
+
@output_languages_path = output_data_path + "/languages.csv"
|
31
|
+
@output_accessibility_for_disabilities_path = output_data_path + "/accessibility_for_disabilities.csv"
|
32
|
+
@output_taxonomies_path = output_data_path + "/taxonomy.csv"
|
33
|
+
@output_service_taxonomies_path = output_data_path + "/services_taxonomy.csv"
|
34
|
+
@output_regular_schedules_path = output_data_path + "/regular_schedules.csv"
|
35
|
+
@output_service_areas_path = output_data_path + "/service_areas.csv"
|
36
|
+
|
37
|
+
@datapackage_json_path = File.join(ENV["ROOT_PATH"], "lib/datapackage/datapackage.json")
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module HsdsTransformer
|
2
|
+
module Headers
|
3
|
+
ORGANIZATIONS_HEADERS = %w(id name alternate_name description email url tax_status tax_id year_incorporated legal_status)
|
4
|
+
LOCATIONS_HEADERS = %w(id organization_id name alternate_name description transportation latitude longitude)
|
5
|
+
SERVICES_HEADERS = %w(id organization_id program_id name alternate_name description url email status interpretation_services application_process wait_time fees accreditations licenses)
|
6
|
+
PHONES_HEADERS = %w(id location_id service_id organization_id contact_id service_at_location_id number extension type language description)
|
7
|
+
PHYSICAL_ADDRESSES_HEADERS = %w(id location_id organization_id attention address_1 city region state_province postal_code country)
|
8
|
+
POSTAL_ADDRESSES_HEADERS = %w(id location_id organization_id attention address_1 city region state_province postal_code country)
|
9
|
+
REGULAR_SCHEDULES_HEADERS = %w(id service_id location_id service_at_location_id weekday opens_at closes_at)
|
10
|
+
SERVICES_AT_LOCATIONS_HEADERS = %w(id service_id location_id description)
|
11
|
+
ELIGIBILITIES_HEADERS = %w(id service_id eligibility)
|
12
|
+
CONTACTS_HEADERS = %w(id organization_id service_id service_at_location_id name title department email)
|
13
|
+
LANGUAGES_HEADERS = %w(id service_id location_id language)
|
14
|
+
ACCESSIBILITY_FOR_DISABILITIES_HEADERS = %w(id location_id accessibility details)
|
15
|
+
TAXONOMIES_HEADERS = %w(id name parent_id parent_name vocabulary)
|
16
|
+
SERVICE_TAXONOMIES_HEADERS = %w(id service_id taxonomy_id taxonomy_detail)
|
17
|
+
SERVICE_AREAS_HEADERS = %w(id service_id service_area description)
|
18
|
+
|
19
|
+
def headers(row, model)
|
20
|
+
const_name = "HsdsTransformer::Headers::" + model.upcase + "_HEADERS"
|
21
|
+
# TODO make sure valid
|
22
|
+
const = Object.const_get(const_name)
|
23
|
+
|
24
|
+
if row && @include_custom
|
25
|
+
(const + row.keys).uniq
|
26
|
+
else
|
27
|
+
const
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module HsdsTransformer
|
2
|
+
class Runner
|
3
|
+
|
4
|
+
VALID_CUSTOM_TRANSFORMERS = %w(Open211MiamiTransformer IlaoTransformer)
|
5
|
+
|
6
|
+
# Args:
|
7
|
+
# input_path - indicates the dir containing the input data files
|
8
|
+
# output_path - indicates the dir you want the resulting HSDS files to go
|
9
|
+
# include_custom - Default: false - indicates that the final output CSVs should include the non-HSDS columns that the original input CSVs had
|
10
|
+
# zip_output - Default: false - indicates whether you want the output to be zipped into a single datapackage.zip
|
11
|
+
# custom_transformer - Default: nil - indicates the custom transformer class you want to use. This arg does not get passed to transformer classes
|
12
|
+
def self.run(args)
|
13
|
+
custom = args.delete(:custom_transformer)
|
14
|
+
validate_custom(custom)
|
15
|
+
|
16
|
+
transformer = custom ? custom_transformer(custom) : BaseTransformer
|
17
|
+
|
18
|
+
transformer.run(args)
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.validate_custom(custom)
|
22
|
+
if custom && !VALID_CUSTOM_TRANSFORMERS.include?(custom)
|
23
|
+
raise InvalidCustomTransformerException
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.custom_transformer(custom)
|
28
|
+
klass = "HsdsTransformer::" + custom
|
29
|
+
Object.const_get(klass)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require "dotenv/load"
|
2
|
+
require "csv"
|
3
|
+
require "yaml"
|
4
|
+
require "zip"
|
5
|
+
require "zip/zip"
|
6
|
+
require "rest_client"
|
7
|
+
|
8
|
+
require "hsds_transformer/file_paths"
|
9
|
+
require "hsds_transformer/headers"
|
10
|
+
require "hsds_transformer/exceptions"
|
11
|
+
require "hsds_transformer/runner"
|
12
|
+
require "hsds_transformer/base_transformer"
|
13
|
+
|
14
|
+
require "hsds_transformer/custom/open211_miami_transformer"
|
15
|
+
require "hsds_transformer/custom/ilao_transformer"
|
data/lib/support.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# TODO implement validation
|
2
|
+
module Support
|
3
|
+
def validate(filename, type)
|
4
|
+
filename = "#{filename}"
|
5
|
+
file = File.new(filename, 'rb')
|
6
|
+
RestClient.post('http://localhost:1400/validate/csv',
|
7
|
+
{"file" => file,
|
8
|
+
"type" => type})
|
9
|
+
return true
|
10
|
+
rescue RestClient::BadRequest
|
11
|
+
@valid = false
|
12
|
+
return false
|
13
|
+
end
|
14
|
+
|
15
|
+
def validate_output
|
16
|
+
unless validate(output_organizations_path, "organization")
|
17
|
+
puts "Organization data not valid"
|
18
|
+
end
|
19
|
+
unless validate(output_locations_path, "location")
|
20
|
+
puts "Location data not valid"
|
21
|
+
end
|
22
|
+
unless validate(output_services_path, "service")
|
23
|
+
puts "Service data not valid"
|
24
|
+
end
|
25
|
+
unless validate(output_phones_path, "phone")
|
26
|
+
puts "Phone data not valid"
|
27
|
+
end
|
28
|
+
rescue Errno::ECONNREFUSED
|
29
|
+
puts "Can't connect to validation service."
|
30
|
+
end
|
31
|
+
end
|