heydan 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/Guardfile +44 -0
- data/LICENSE.txt +11 -0
- data/README.md +84 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/heydan +11 -0
- data/bin/setup +7 -0
- data/heydan.gemspec +49 -0
- data/lib/heydan.rb +65 -0
- data/lib/heydan/base.rb +72 -0
- data/lib/heydan/cdn.rb +30 -0
- data/lib/heydan/cli.rb +80 -0
- data/lib/heydan/help_text.rb +83 -0
- data/lib/heydan/helper.rb +108 -0
- data/lib/heydan/import.rb +49 -0
- data/lib/heydan/jurisdiction_file.rb +119 -0
- data/lib/heydan/open_civic_identifiers.rb +51 -0
- data/lib/heydan/script.rb +179 -0
- data/lib/heydan/script_file.rb +44 -0
- data/lib/heydan/server.rb +55 -0
- data/lib/heydan/source_file.rb +79 -0
- data/lib/heydan/sources.rb +128 -0
- data/lib/heydan/version.rb +3 -0
- data/lib/templates/script.rb.erb +32 -0
- metadata +285 -0
data/lib/heydan/cli.rb
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
require 'thor'
|
2
|
+
|
3
|
+
module HeyDan
|
4
|
+
|
5
|
+
class Source < Thor
|
6
|
+
|
7
|
+
desc "sync", "sync your sources folders from the settings file"
|
8
|
+
def sync()
|
9
|
+
HeyDan::helper_text('sources_sync')
|
10
|
+
HeyDan::Sources.sync
|
11
|
+
end
|
12
|
+
|
13
|
+
desc "add GITHUB_LINK", "Add a new folder of sources from github"
|
14
|
+
def add(github_link)
|
15
|
+
HeyDan::helper_text('sources_add')
|
16
|
+
HeyDan::Sources.add(github_link)
|
17
|
+
end
|
18
|
+
|
19
|
+
desc "update NAME", "update a folder of sources"
|
20
|
+
def update(name)
|
21
|
+
HeyDan::helper_text('sources_update')
|
22
|
+
HeyDan::Sources.update(name)
|
23
|
+
end
|
24
|
+
|
25
|
+
desc "new FOLDER SOURCE VARIABLE", "adds a new source NAME in the FOLDER, and an optional VARIABLE"
|
26
|
+
def new(folder, name, variable=nil)
|
27
|
+
HeyDan::helper_text('sources_new')
|
28
|
+
HeyDan::Sources.create(folder, name, variable)
|
29
|
+
end
|
30
|
+
|
31
|
+
option :fromsource, type: :boolean
|
32
|
+
desc "build FOLDER NAME VARIABLE", "builds a source's variables in FOLDER, or optional VARIABLE. You can pass --type school_district for a specific jurisdiction type, or --from-source to build original files"
|
33
|
+
def build(folder=nil, name=nil, variable=nil)
|
34
|
+
HeyDan::helper_text('sources_build')
|
35
|
+
HeyDan.options = options
|
36
|
+
HeyDan::Sources.build(folder, name, variable)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class Cli < Thor
|
41
|
+
class_option 'type'
|
42
|
+
|
43
|
+
desc "setup DIR", "Setups HeyDan in the current directory or specified DIR"
|
44
|
+
def setup(dir=nil)
|
45
|
+
HeyDan::helper_text('setup')
|
46
|
+
HeyDan::Base.setup(dir)
|
47
|
+
end
|
48
|
+
|
49
|
+
desc "build", "Builds jurisdiction files"
|
50
|
+
def build()
|
51
|
+
HeyDan::helper_text('build')
|
52
|
+
HeyDan.options = options
|
53
|
+
HeyDan::OpenCivicIdentifiers.build
|
54
|
+
end
|
55
|
+
desc 'import', "Imports files into elasticsearch"
|
56
|
+
def import()
|
57
|
+
HeyDan::helper_text('import')
|
58
|
+
HeyDan::Import.process
|
59
|
+
end
|
60
|
+
|
61
|
+
desc "sources SUBCOMMAND ...ARGS", "manage sources"
|
62
|
+
subcommand "sources", Source
|
63
|
+
|
64
|
+
desc "server", "starts up the webserver for heydan"
|
65
|
+
def server()
|
66
|
+
puts "Serving up some HeyDan Realness"
|
67
|
+
HeyDan::Server.run!
|
68
|
+
end
|
69
|
+
|
70
|
+
desc "upload", "starts up the webserver for heydan"
|
71
|
+
def upload()
|
72
|
+
puts "Serving up some HeyDan Realness"
|
73
|
+
HeyDan::Cdn.upload
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
|
80
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
class HeyDan::HelpText
|
2
|
+
class << self
|
3
|
+
def setup(opts={})
|
4
|
+
return if !HeyDan.help?
|
5
|
+
puts %Q(
|
6
|
+
Hi! Adding a jurisdictions, datasets, downloads and sources directory and a settings.yml file. If you want to move these directories to other places, just update their locations in the settings file.
|
7
|
+
|
8
|
+
If you want to run heydan from a different folder than the settings.yml, create an environment variable:
|
9
|
+
|
10
|
+
export HEYDAN_SETTINGS = full/path/to/settings.yml
|
11
|
+
|
12
|
+
To turn off this help, run 'heydan help off' or set the help in settings to false.
|
13
|
+
|
14
|
+
heydan grabs datasets and information about jurisdictions. If you want to focus on just one type of jurisdiction, update the settings 'jurisdiction_type'. Or you can pass --type school_district to any heydan command.
|
15
|
+
|
16
|
+
Next, run `heydan build` to setup your files.
|
17
|
+
)
|
18
|
+
end
|
19
|
+
|
20
|
+
def build(opts={})
|
21
|
+
return if !HeyDan.help?
|
22
|
+
type = opts[:type] || 'all'
|
23
|
+
puts %Q(
|
24
|
+
Woot, building files for type #{type} jurisdictions/. You will see a progress bar below. If you didn't specify a type, it might take a while.
|
25
|
+
|
26
|
+
heydan uses the Open Civic Identifiers format to structure file names and main identification for jurisdictions. This helps create a unique nonchanging identification code for every jurisdiction, based on the sponsoring parent. So, the State of Kansas, would be country:us/state:kansas. heydan creates a flat json file for each jurisdiction, which you can then import into your own application or elasticsearch.
|
27
|
+
|
28
|
+
Next, run heydan sources sync
|
29
|
+
)
|
30
|
+
end
|
31
|
+
|
32
|
+
def sources_add
|
33
|
+
return if !HeyDan.help?
|
34
|
+
puts %Q(
|
35
|
+
You can leverage the community of developers out there. Add the github link to a source repo and tap into all that hardwork.
|
36
|
+
|
37
|
+
When you add a new one, it will get stored in your settings file under 'sources'
|
38
|
+
)
|
39
|
+
end
|
40
|
+
|
41
|
+
def sources_sync
|
42
|
+
return if !HeyDan.help?
|
43
|
+
puts %Q(
|
44
|
+
Sync all the sources in your settings file.
|
45
|
+
)
|
46
|
+
end
|
47
|
+
|
48
|
+
def sources_update
|
49
|
+
return if !HeyDan.help?
|
50
|
+
puts %Q(
|
51
|
+
Update a single source.
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
def sources_build
|
56
|
+
end
|
57
|
+
|
58
|
+
def sources_new
|
59
|
+
end
|
60
|
+
|
61
|
+
def git_clone(name)
|
62
|
+
return if !HeyDan.help?
|
63
|
+
puts %Q(Cloning #{name} into #{HeyDan.folders[:sources]})
|
64
|
+
end
|
65
|
+
|
66
|
+
def git_update(name)
|
67
|
+
return if !HeyDan.help?
|
68
|
+
puts %Q(Updating #{name} in #{HeyDan.folders[:sources]})
|
69
|
+
end
|
70
|
+
|
71
|
+
def build_identifier(identifier)
|
72
|
+
return if !HeyDan.help?
|
73
|
+
puts %Q("building identifiers hash for #{identifier} to filenames, this might take a moment")
|
74
|
+
end
|
75
|
+
|
76
|
+
def import
|
77
|
+
return if !HeyDan.help?
|
78
|
+
puts "This will import all the files/contents from the jurisdictions folder into ElasticSearch. Make sure it's running!"
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
require 'digest'
|
2
|
+
require 'uri'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'csv'
|
5
|
+
|
6
|
+
class HeyDan::Helper
|
7
|
+
|
8
|
+
class << self
|
9
|
+
|
10
|
+
def classify(name)
|
11
|
+
name.split('_').collect(&:capitalize).join
|
12
|
+
end
|
13
|
+
|
14
|
+
def download(url)
|
15
|
+
path = HeyDan.folders[:downloads]
|
16
|
+
new_file = File.join(path, md5_name(url))
|
17
|
+
return new_file if File.exist?(new_file)
|
18
|
+
download_file(url, new_file)
|
19
|
+
new_file
|
20
|
+
end
|
21
|
+
|
22
|
+
def save_data(name, data)
|
23
|
+
full_path = File.expand_path(File.join(HeyDan.folders[:datasets], "#{name.gsub('.csv', '')}.csv"))
|
24
|
+
CSV.open(full_path, 'w') do |csv|
|
25
|
+
data.each do |row|
|
26
|
+
csv << row
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def get_data(name)
|
32
|
+
CSV.read(File.join(HeyDan.folders[:datasets], "#{name.gsub('.csv', '')}.csv"))
|
33
|
+
end
|
34
|
+
|
35
|
+
def dataset_exists?(name)
|
36
|
+
File.exist?(File.join(HeyDan.folders[:datasets], "#{name.gsub('.csv', '')}.csv"))
|
37
|
+
end
|
38
|
+
|
39
|
+
def get_data_from_url(url)
|
40
|
+
ext = get_file_type_from_url(url)
|
41
|
+
file = download(url)
|
42
|
+
@data = case ext
|
43
|
+
when 'csv'
|
44
|
+
get_csv_data(file)
|
45
|
+
when 'zip'
|
46
|
+
files = unzip(file)
|
47
|
+
if files.size == 1
|
48
|
+
get_csv_data(files[0]) if is_csv?(files[0])
|
49
|
+
else
|
50
|
+
files.map { |f| get_csv_data(f) if is_csv?(f)}
|
51
|
+
end
|
52
|
+
when 'txt'
|
53
|
+
get_csv_data(file) if is_csv?(file)
|
54
|
+
else
|
55
|
+
get_csv_data(file) if is_csv?(file)
|
56
|
+
end
|
57
|
+
@data
|
58
|
+
end
|
59
|
+
|
60
|
+
def is_csv?(file_path)
|
61
|
+
contents = File.open(file_path, &:readline)
|
62
|
+
contents.match(/\b\t/).nil? || contents.match(/\b,/).nil? #not perfect
|
63
|
+
end
|
64
|
+
|
65
|
+
def get_csv_data(file)
|
66
|
+
contents = File.read(file, :encoding => 'utf-8').encode("UTF-8", :invalid=>:replace, :replace=>"").gsub('"',"")
|
67
|
+
|
68
|
+
if contents.include?("\t")
|
69
|
+
CSV.parse(contents, { :col_sep => "\t" })
|
70
|
+
else
|
71
|
+
CSV.parse(contents)
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
|
76
|
+
def md5_name(text)
|
77
|
+
Digest::MD5.hexdigest(text)
|
78
|
+
end
|
79
|
+
|
80
|
+
def download_file(url,file_path)
|
81
|
+
f = open(url)
|
82
|
+
full_path = File.expand_path(file_path)
|
83
|
+
File.open(full_path, 'wb') do |saved_file|
|
84
|
+
saved_file.write(f.read)
|
85
|
+
end
|
86
|
+
full_path
|
87
|
+
end
|
88
|
+
|
89
|
+
def get_file_type_from_url(url)
|
90
|
+
file_type = File.extname(URI.parse(url).path).gsub('.', '')
|
91
|
+
end
|
92
|
+
|
93
|
+
def unzip(file)
|
94
|
+
path = HeyDan.folders[:downloads]
|
95
|
+
require 'zip'
|
96
|
+
files = []
|
97
|
+
Zip::File.open(file) do |zip_file|
|
98
|
+
zip_file.each do |entry|
|
99
|
+
download_path = File.expand_path(File.join(path, entry.name))
|
100
|
+
entry.extract(download_path) unless File.exists?(download_path)
|
101
|
+
files << download_path
|
102
|
+
end
|
103
|
+
end
|
104
|
+
files
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
108
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'ruby-progressbar'
|
2
|
+
require 'elasticsearch'
|
3
|
+
|
4
|
+
class HeyDan::Import
|
5
|
+
attr_accessor :client
|
6
|
+
|
7
|
+
class << self
|
8
|
+
def client
|
9
|
+
@client ||= Elasticsearch::Client.new host: HeyDan.elasticsearch[:url], log: false
|
10
|
+
end
|
11
|
+
|
12
|
+
def index
|
13
|
+
@index ||= 'jurisdictions'
|
14
|
+
end
|
15
|
+
|
16
|
+
def check_index
|
17
|
+
client.indices.exists? index: index
|
18
|
+
end
|
19
|
+
|
20
|
+
def create_index
|
21
|
+
client.indices.create index: index
|
22
|
+
end
|
23
|
+
|
24
|
+
def process
|
25
|
+
create_index unless check_index
|
26
|
+
total = Dir.glob("#{HeyDan.folders[:jurisdictions]}/*").size
|
27
|
+
files= Dir.glob("#{HeyDan.folders[:jurisdictions]}/*")
|
28
|
+
a=0
|
29
|
+
b=10000
|
30
|
+
progress = ProgressBar.create(:title => "Importing #{files.size} jurisdictions into Elastic Search", :starting_at => a, :total => files.size)
|
31
|
+
while true do
|
32
|
+
@bulk = []
|
33
|
+
b=( files.size - b < 10000 ? -1 : a + 10000)
|
34
|
+
files[a..b].each do |file|
|
35
|
+
jf = HeyDan::JurisdictionFile.new(name: file)
|
36
|
+
@bulk << { index: { _index: 'jurisdictions', _type: jf.type, _id: jf.hash_id, data: jf.get_json } }
|
37
|
+
end
|
38
|
+
@client.bulk refresh: true, body: @bulk; nil
|
39
|
+
a = b + 1
|
40
|
+
if b == -1
|
41
|
+
progress.finish
|
42
|
+
break
|
43
|
+
else
|
44
|
+
progress.progress = a
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
class HeyDan::JurisdictionFile
|
4
|
+
attr_accessor :name
|
5
|
+
attr_accessor :json
|
6
|
+
|
7
|
+
def initialize(opts={})
|
8
|
+
@name = opts[:name]
|
9
|
+
convert_file_name if @name.include?('.json')
|
10
|
+
@name = @name.gsub('jurisdictions/','').gsub('ocd-division/','').gsub(/\.\.\//,'')
|
11
|
+
raise "Name is required" if @name.nil?
|
12
|
+
end
|
13
|
+
|
14
|
+
def type
|
15
|
+
@name.split('/')[-1].split(':')[0]
|
16
|
+
end
|
17
|
+
|
18
|
+
def id
|
19
|
+
get_json
|
20
|
+
@json['id']
|
21
|
+
end
|
22
|
+
|
23
|
+
def match_type?(ocd_type)
|
24
|
+
return true if ocd_type.nil?
|
25
|
+
!id.match(/#{ocd_type.gsub(':all', '.+')}/).nil?
|
26
|
+
end
|
27
|
+
|
28
|
+
def convert_file_name
|
29
|
+
@name = @name.split('/')[-1]
|
30
|
+
@name = "#{@name.gsub('::','/').gsub('.json','')}"
|
31
|
+
end
|
32
|
+
|
33
|
+
def file_name
|
34
|
+
"#{@name.gsub(/\//, '::').gsub('ocd-division::', '')}.json"
|
35
|
+
end
|
36
|
+
|
37
|
+
def hash_id
|
38
|
+
HeyDan::Helper.md5_name(@name.gsub(/\.\.\//,''))
|
39
|
+
end
|
40
|
+
|
41
|
+
def exists?
|
42
|
+
File.exists?(file_path)
|
43
|
+
end
|
44
|
+
|
45
|
+
def file_path
|
46
|
+
File.expand_path(File.join(HeyDan.folders[:jurisdictions], file_name))
|
47
|
+
end
|
48
|
+
|
49
|
+
def type
|
50
|
+
@name.split('/')[-1].split(':')[0]
|
51
|
+
end
|
52
|
+
|
53
|
+
def initial_json
|
54
|
+
{'id' => @name, 'entityType' => type, 'attributes'=> {}, 'identifiers' => {}, 'datasets' => []}
|
55
|
+
end
|
56
|
+
|
57
|
+
def get_identifier(key)
|
58
|
+
get_json
|
59
|
+
@json['identifiers'][key]
|
60
|
+
end
|
61
|
+
|
62
|
+
def add_identifier(key, value)
|
63
|
+
get_json
|
64
|
+
@json['identifiers'][key] = value
|
65
|
+
@json
|
66
|
+
end
|
67
|
+
|
68
|
+
def add_dataset(value)
|
69
|
+
get_json
|
70
|
+
@json['datasets'] << value
|
71
|
+
@json
|
72
|
+
end
|
73
|
+
|
74
|
+
def get_dataset(key)
|
75
|
+
get_json
|
76
|
+
@json['datasets'].select { |d| d['id']==key}[0]
|
77
|
+
end
|
78
|
+
|
79
|
+
def datasets
|
80
|
+
get_json
|
81
|
+
@json['datasets']
|
82
|
+
end
|
83
|
+
|
84
|
+
def add_property(key, value)
|
85
|
+
return false if ['datasets', 'identifiers', 'id', 'entityType', 'attributes'].include?(key)
|
86
|
+
get_json
|
87
|
+
@json[key] = value
|
88
|
+
@json
|
89
|
+
end
|
90
|
+
|
91
|
+
def add_attribute(key, value)
|
92
|
+
get_json
|
93
|
+
@json['attributes'][key] = value
|
94
|
+
@json
|
95
|
+
end
|
96
|
+
|
97
|
+
def get_attribute(key)
|
98
|
+
get_json
|
99
|
+
@json['attributes'][key]
|
100
|
+
end
|
101
|
+
|
102
|
+
def get_json
|
103
|
+
if !exists?
|
104
|
+
@json ||= initial_json
|
105
|
+
else
|
106
|
+
file = File.read(file_path)
|
107
|
+
@json ||= initial_json if file == ""
|
108
|
+
end
|
109
|
+
return @json if @json
|
110
|
+
@json = JSON.parse(file)
|
111
|
+
end
|
112
|
+
|
113
|
+
def save
|
114
|
+
File.open(file_path, 'w') do |f|
|
115
|
+
f.write(@json.to_json)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'ruby-progressbar'
|
2
|
+
|
3
|
+
class HeyDan::OpenCivicIdentifiers
|
4
|
+
include HeyDan
|
5
|
+
|
6
|
+
class << self
|
7
|
+
attr_accessor :jurisdiction_type
|
8
|
+
attr_accessor :jurisdictions_folder
|
9
|
+
attr_accessor :data
|
10
|
+
|
11
|
+
def name
|
12
|
+
'open_civic_data'
|
13
|
+
end
|
14
|
+
|
15
|
+
def build(opts={})
|
16
|
+
@jurisdiction_type = HeyDan.options[:type]
|
17
|
+
HeyDan::Base.load_or_create_settings
|
18
|
+
HeyDan::Base.create_folders
|
19
|
+
@jurisdictions_folder = HeyDan.folders[:jurisdictions]
|
20
|
+
download
|
21
|
+
build_jurisdiction_files
|
22
|
+
end
|
23
|
+
|
24
|
+
def download
|
25
|
+
if !HeyDan::Helper.dataset_exists?(name)
|
26
|
+
@data = HeyDan::Helper.get_data_from_url('https://github.com/opencivicdata/ocd-division-ids/blob/master/identifiers/country-us.csv?raw=true')
|
27
|
+
@data = @data[1..-1].map { |c| [c[0], c[1]]}
|
28
|
+
@data.unshift(['id', 'name'])
|
29
|
+
HeyDan::Helper.save_data(name, @data)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def build_jurisdiction_files
|
34
|
+
if @data.nil?
|
35
|
+
@data = HeyDan::Helper.get_data(name)
|
36
|
+
end
|
37
|
+
@progress = ProgressBar.create(:title => "Building Files in #{HeyDan.folders[:jurisdictions]} for jurisdictions #{('matching ' + @jurisdiction_type) if @jurisdiction_type}", :starting_at => 0, :total => @data[1..-1].size) if HeyDan.help?
|
38
|
+
@data[1..-1].each_index do |i|
|
39
|
+
row = @data[i+1]
|
40
|
+
jf = HeyDan::JurisdictionFile.new(name: row[0])
|
41
|
+
next if !jf.match_type?(@jurisdiction_type)
|
42
|
+
jf.add_identifier('open_civic_id', row[0].gsub('ocd-division/',''))
|
43
|
+
jf.add_property('name', row[1])
|
44
|
+
jf.save
|
45
|
+
@progress.progress = i if HeyDan.help?
|
46
|
+
end
|
47
|
+
@progress.finish if HeyDan.help?
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|