heydan 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/Guardfile +44 -0
- data/LICENSE.txt +11 -0
- data/README.md +84 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/heydan +11 -0
- data/bin/setup +7 -0
- data/heydan.gemspec +49 -0
- data/lib/heydan.rb +65 -0
- data/lib/heydan/base.rb +72 -0
- data/lib/heydan/cdn.rb +30 -0
- data/lib/heydan/cli.rb +80 -0
- data/lib/heydan/help_text.rb +83 -0
- data/lib/heydan/helper.rb +108 -0
- data/lib/heydan/import.rb +49 -0
- data/lib/heydan/jurisdiction_file.rb +119 -0
- data/lib/heydan/open_civic_identifiers.rb +51 -0
- data/lib/heydan/script.rb +179 -0
- data/lib/heydan/script_file.rb +44 -0
- data/lib/heydan/server.rb +55 -0
- data/lib/heydan/source_file.rb +79 -0
- data/lib/heydan/sources.rb +128 -0
- data/lib/heydan/version.rb +3 -0
- data/lib/templates/script.rb.erb +32 -0
- metadata +285 -0
data/lib/heydan/cli.rb
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
require 'thor'
|
2
|
+
|
3
|
+
module HeyDan
|
4
|
+
|
5
|
+
class Source < Thor
|
6
|
+
|
7
|
+
desc "sync", "sync your sources folders from the settings file"
|
8
|
+
def sync()
|
9
|
+
HeyDan::helper_text('sources_sync')
|
10
|
+
HeyDan::Sources.sync
|
11
|
+
end
|
12
|
+
|
13
|
+
desc "add GITHUB_LINK", "Add a new folder of sources from github"
|
14
|
+
def add(github_link)
|
15
|
+
HeyDan::helper_text('sources_add')
|
16
|
+
HeyDan::Sources.add(github_link)
|
17
|
+
end
|
18
|
+
|
19
|
+
desc "update NAME", "update a folder of sources"
|
20
|
+
def update(name)
|
21
|
+
HeyDan::helper_text('sources_update')
|
22
|
+
HeyDan::Sources.update(name)
|
23
|
+
end
|
24
|
+
|
25
|
+
desc "new FOLDER SOURCE VARIABLE", "adds a new source NAME in the FOLDER, and an optional VARIABLE"
|
26
|
+
def new(folder, name, variable=nil)
|
27
|
+
HeyDan::helper_text('sources_new')
|
28
|
+
HeyDan::Sources.create(folder, name, variable)
|
29
|
+
end
|
30
|
+
|
31
|
+
option :fromsource, type: :boolean
|
32
|
+
desc "build FOLDER NAME VARIABLE", "builds a source's variables in FOLDER, or optional VARIABLE. You can pass --type school_district for a specific jurisdiction type, or --from-source to build original files"
|
33
|
+
def build(folder=nil, name=nil, variable=nil)
|
34
|
+
HeyDan::helper_text('sources_build')
|
35
|
+
HeyDan.options = options
|
36
|
+
HeyDan::Sources.build(folder, name, variable)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class Cli < Thor
|
41
|
+
class_option 'type'
|
42
|
+
|
43
|
+
desc "setup DIR", "Setups HeyDan in the current directory or specified DIR"
|
44
|
+
def setup(dir=nil)
|
45
|
+
HeyDan::helper_text('setup')
|
46
|
+
HeyDan::Base.setup(dir)
|
47
|
+
end
|
48
|
+
|
49
|
+
desc "build", "Builds jurisdiction files"
|
50
|
+
def build()
|
51
|
+
HeyDan::helper_text('build')
|
52
|
+
HeyDan.options = options
|
53
|
+
HeyDan::OpenCivicIdentifiers.build
|
54
|
+
end
|
55
|
+
desc 'import', "Imports files into elasticsearch"
|
56
|
+
def import()
|
57
|
+
HeyDan::helper_text('import')
|
58
|
+
HeyDan::Import.process
|
59
|
+
end
|
60
|
+
|
61
|
+
desc "sources SUBCOMMAND ...ARGS", "manage sources"
|
62
|
+
subcommand "sources", Source
|
63
|
+
|
64
|
+
desc "server", "starts up the webserver for heydan"
|
65
|
+
def server()
|
66
|
+
puts "Serving up some HeyDan Realness"
|
67
|
+
HeyDan::Server.run!
|
68
|
+
end
|
69
|
+
|
70
|
+
desc "upload", "starts up the webserver for heydan"
|
71
|
+
def upload()
|
72
|
+
puts "Serving up some HeyDan Realness"
|
73
|
+
HeyDan::Cdn.upload
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
|
80
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
class HeyDan::HelpText
|
2
|
+
class << self
|
3
|
+
def setup(opts={})
|
4
|
+
return if !HeyDan.help?
|
5
|
+
puts %Q(
|
6
|
+
Hi! Adding a jurisdictions, datasets, downloads and sources directory and a settings.yml file. If you want to move these directories to other places, just update their locations in the settings file.
|
7
|
+
|
8
|
+
If you want to run heydan from a different folder than the settings.yml, create an environment variable:
|
9
|
+
|
10
|
+
export HEYDAN_SETTINGS = full/path/to/settings.yml
|
11
|
+
|
12
|
+
To turn off this help, run 'heydan help off' or set the help in settings to false.
|
13
|
+
|
14
|
+
heydan grabs datasets and information about jurisdictions. If you want to focus on just one type of jurisdiction, update the settings 'jurisdiction_type'. Or you can pass --type school_district to any heydan command.
|
15
|
+
|
16
|
+
Next, run `heydan build` to setup your files.
|
17
|
+
)
|
18
|
+
end
|
19
|
+
|
20
|
+
def build(opts={})
|
21
|
+
return if !HeyDan.help?
|
22
|
+
type = opts[:type] || 'all'
|
23
|
+
puts %Q(
|
24
|
+
Woot, building files for type #{type} jurisdictions/. You will see a progress bar below. If you didn't specify a type, it might take a while.
|
25
|
+
|
26
|
+
heydan uses the Open Civic Identifiers format to structure file names and main identification for jurisdictions. This helps create a unique nonchanging identification code for every jurisdiction, based on the sponsoring parent. So, the State of Kansas, would be country:us/state:kansas. heydan creates a flat json file for each jurisdiction, which you can then import into your own application or elasticsearch.
|
27
|
+
|
28
|
+
Next, run heydan sources sync
|
29
|
+
)
|
30
|
+
end
|
31
|
+
|
32
|
+
def sources_add
|
33
|
+
return if !HeyDan.help?
|
34
|
+
puts %Q(
|
35
|
+
You can leverage the community of developers out there. Add the github link to a source repo and tap into all that hardwork.
|
36
|
+
|
37
|
+
When you add a new one, it will get stored in your settings file under 'sources'
|
38
|
+
)
|
39
|
+
end
|
40
|
+
|
41
|
+
def sources_sync
|
42
|
+
return if !HeyDan.help?
|
43
|
+
puts %Q(
|
44
|
+
Sync all the sources in your settings file.
|
45
|
+
)
|
46
|
+
end
|
47
|
+
|
48
|
+
def sources_update
|
49
|
+
return if !HeyDan.help?
|
50
|
+
puts %Q(
|
51
|
+
Update a single source.
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
def sources_build
|
56
|
+
end
|
57
|
+
|
58
|
+
def sources_new
|
59
|
+
end
|
60
|
+
|
61
|
+
def git_clone(name)
|
62
|
+
return if !HeyDan.help?
|
63
|
+
puts %Q(Cloning #{name} into #{HeyDan.folders[:sources]})
|
64
|
+
end
|
65
|
+
|
66
|
+
def git_update(name)
|
67
|
+
return if !HeyDan.help?
|
68
|
+
puts %Q(Updating #{name} in #{HeyDan.folders[:sources]})
|
69
|
+
end
|
70
|
+
|
71
|
+
def build_identifier(identifier)
|
72
|
+
return if !HeyDan.help?
|
73
|
+
puts %Q("building identifiers hash for #{identifier} to filenames, this might take a moment")
|
74
|
+
end
|
75
|
+
|
76
|
+
def import
|
77
|
+
return if !HeyDan.help?
|
78
|
+
puts "This will import all the files/contents from the jurisdictions folder into ElasticSearch. Make sure it's running!"
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
require 'digest'
|
2
|
+
require 'uri'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'csv'
|
5
|
+
|
6
|
+
class HeyDan::Helper
|
7
|
+
|
8
|
+
class << self
|
9
|
+
|
10
|
+
def classify(name)
|
11
|
+
name.split('_').collect(&:capitalize).join
|
12
|
+
end
|
13
|
+
|
14
|
+
def download(url)
|
15
|
+
path = HeyDan.folders[:downloads]
|
16
|
+
new_file = File.join(path, md5_name(url))
|
17
|
+
return new_file if File.exist?(new_file)
|
18
|
+
download_file(url, new_file)
|
19
|
+
new_file
|
20
|
+
end
|
21
|
+
|
22
|
+
def save_data(name, data)
|
23
|
+
full_path = File.expand_path(File.join(HeyDan.folders[:datasets], "#{name.gsub('.csv', '')}.csv"))
|
24
|
+
CSV.open(full_path, 'w') do |csv|
|
25
|
+
data.each do |row|
|
26
|
+
csv << row
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def get_data(name)
|
32
|
+
CSV.read(File.join(HeyDan.folders[:datasets], "#{name.gsub('.csv', '')}.csv"))
|
33
|
+
end
|
34
|
+
|
35
|
+
def dataset_exists?(name)
|
36
|
+
File.exist?(File.join(HeyDan.folders[:datasets], "#{name.gsub('.csv', '')}.csv"))
|
37
|
+
end
|
38
|
+
|
39
|
+
def get_data_from_url(url)
|
40
|
+
ext = get_file_type_from_url(url)
|
41
|
+
file = download(url)
|
42
|
+
@data = case ext
|
43
|
+
when 'csv'
|
44
|
+
get_csv_data(file)
|
45
|
+
when 'zip'
|
46
|
+
files = unzip(file)
|
47
|
+
if files.size == 1
|
48
|
+
get_csv_data(files[0]) if is_csv?(files[0])
|
49
|
+
else
|
50
|
+
files.map { |f| get_csv_data(f) if is_csv?(f)}
|
51
|
+
end
|
52
|
+
when 'txt'
|
53
|
+
get_csv_data(file) if is_csv?(file)
|
54
|
+
else
|
55
|
+
get_csv_data(file) if is_csv?(file)
|
56
|
+
end
|
57
|
+
@data
|
58
|
+
end
|
59
|
+
|
60
|
+
def is_csv?(file_path)
|
61
|
+
contents = File.open(file_path, &:readline)
|
62
|
+
contents.match(/\b\t/).nil? || contents.match(/\b,/).nil? #not perfect
|
63
|
+
end
|
64
|
+
|
65
|
+
def get_csv_data(file)
|
66
|
+
contents = File.read(file, :encoding => 'utf-8').encode("UTF-8", :invalid=>:replace, :replace=>"").gsub('"',"")
|
67
|
+
|
68
|
+
if contents.include?("\t")
|
69
|
+
CSV.parse(contents, { :col_sep => "\t" })
|
70
|
+
else
|
71
|
+
CSV.parse(contents)
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
|
76
|
+
def md5_name(text)
|
77
|
+
Digest::MD5.hexdigest(text)
|
78
|
+
end
|
79
|
+
|
80
|
+
def download_file(url,file_path)
|
81
|
+
f = open(url)
|
82
|
+
full_path = File.expand_path(file_path)
|
83
|
+
File.open(full_path, 'wb') do |saved_file|
|
84
|
+
saved_file.write(f.read)
|
85
|
+
end
|
86
|
+
full_path
|
87
|
+
end
|
88
|
+
|
89
|
+
def get_file_type_from_url(url)
|
90
|
+
file_type = File.extname(URI.parse(url).path).gsub('.', '')
|
91
|
+
end
|
92
|
+
|
93
|
+
def unzip(file)
|
94
|
+
path = HeyDan.folders[:downloads]
|
95
|
+
require 'zip'
|
96
|
+
files = []
|
97
|
+
Zip::File.open(file) do |zip_file|
|
98
|
+
zip_file.each do |entry|
|
99
|
+
download_path = File.expand_path(File.join(path, entry.name))
|
100
|
+
entry.extract(download_path) unless File.exists?(download_path)
|
101
|
+
files << download_path
|
102
|
+
end
|
103
|
+
end
|
104
|
+
files
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
108
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'ruby-progressbar'
|
2
|
+
require 'elasticsearch'
|
3
|
+
|
4
|
+
class HeyDan::Import
|
5
|
+
attr_accessor :client
|
6
|
+
|
7
|
+
class << self
|
8
|
+
def client
|
9
|
+
@client ||= Elasticsearch::Client.new host: HeyDan.elasticsearch[:url], log: false
|
10
|
+
end
|
11
|
+
|
12
|
+
def index
|
13
|
+
@index ||= 'jurisdictions'
|
14
|
+
end
|
15
|
+
|
16
|
+
def check_index
|
17
|
+
client.indices.exists? index: index
|
18
|
+
end
|
19
|
+
|
20
|
+
def create_index
|
21
|
+
client.indices.create index: index
|
22
|
+
end
|
23
|
+
|
24
|
+
def process
|
25
|
+
create_index unless check_index
|
26
|
+
total = Dir.glob("#{HeyDan.folders[:jurisdictions]}/*").size
|
27
|
+
files= Dir.glob("#{HeyDan.folders[:jurisdictions]}/*")
|
28
|
+
a=0
|
29
|
+
b=10000
|
30
|
+
progress = ProgressBar.create(:title => "Importing #{files.size} jurisdictions into Elastic Search", :starting_at => a, :total => files.size)
|
31
|
+
while true do
|
32
|
+
@bulk = []
|
33
|
+
b=( files.size - b < 10000 ? -1 : a + 10000)
|
34
|
+
files[a..b].each do |file|
|
35
|
+
jf = HeyDan::JurisdictionFile.new(name: file)
|
36
|
+
@bulk << { index: { _index: 'jurisdictions', _type: jf.type, _id: jf.hash_id, data: jf.get_json } }
|
37
|
+
end
|
38
|
+
@client.bulk refresh: true, body: @bulk; nil
|
39
|
+
a = b + 1
|
40
|
+
if b == -1
|
41
|
+
progress.finish
|
42
|
+
break
|
43
|
+
else
|
44
|
+
progress.progress = a
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
class HeyDan::JurisdictionFile
|
4
|
+
attr_accessor :name
|
5
|
+
attr_accessor :json
|
6
|
+
|
7
|
+
def initialize(opts={})
|
8
|
+
@name = opts[:name]
|
9
|
+
convert_file_name if @name.include?('.json')
|
10
|
+
@name = @name.gsub('jurisdictions/','').gsub('ocd-division/','').gsub(/\.\.\//,'')
|
11
|
+
raise "Name is required" if @name.nil?
|
12
|
+
end
|
13
|
+
|
14
|
+
def type
|
15
|
+
@name.split('/')[-1].split(':')[0]
|
16
|
+
end
|
17
|
+
|
18
|
+
def id
|
19
|
+
get_json
|
20
|
+
@json['id']
|
21
|
+
end
|
22
|
+
|
23
|
+
def match_type?(ocd_type)
|
24
|
+
return true if ocd_type.nil?
|
25
|
+
!id.match(/#{ocd_type.gsub(':all', '.+')}/).nil?
|
26
|
+
end
|
27
|
+
|
28
|
+
def convert_file_name
|
29
|
+
@name = @name.split('/')[-1]
|
30
|
+
@name = "#{@name.gsub('::','/').gsub('.json','')}"
|
31
|
+
end
|
32
|
+
|
33
|
+
def file_name
|
34
|
+
"#{@name.gsub(/\//, '::').gsub('ocd-division::', '')}.json"
|
35
|
+
end
|
36
|
+
|
37
|
+
def hash_id
|
38
|
+
HeyDan::Helper.md5_name(@name.gsub(/\.\.\//,''))
|
39
|
+
end
|
40
|
+
|
41
|
+
def exists?
|
42
|
+
File.exists?(file_path)
|
43
|
+
end
|
44
|
+
|
45
|
+
def file_path
|
46
|
+
File.expand_path(File.join(HeyDan.folders[:jurisdictions], file_name))
|
47
|
+
end
|
48
|
+
|
49
|
+
def type
|
50
|
+
@name.split('/')[-1].split(':')[0]
|
51
|
+
end
|
52
|
+
|
53
|
+
def initial_json
|
54
|
+
{'id' => @name, 'entityType' => type, 'attributes'=> {}, 'identifiers' => {}, 'datasets' => []}
|
55
|
+
end
|
56
|
+
|
57
|
+
def get_identifier(key)
|
58
|
+
get_json
|
59
|
+
@json['identifiers'][key]
|
60
|
+
end
|
61
|
+
|
62
|
+
def add_identifier(key, value)
|
63
|
+
get_json
|
64
|
+
@json['identifiers'][key] = value
|
65
|
+
@json
|
66
|
+
end
|
67
|
+
|
68
|
+
def add_dataset(value)
|
69
|
+
get_json
|
70
|
+
@json['datasets'] << value
|
71
|
+
@json
|
72
|
+
end
|
73
|
+
|
74
|
+
def get_dataset(key)
|
75
|
+
get_json
|
76
|
+
@json['datasets'].select { |d| d['id']==key}[0]
|
77
|
+
end
|
78
|
+
|
79
|
+
def datasets
|
80
|
+
get_json
|
81
|
+
@json['datasets']
|
82
|
+
end
|
83
|
+
|
84
|
+
def add_property(key, value)
|
85
|
+
return false if ['datasets', 'identifiers', 'id', 'entityType', 'attributes'].include?(key)
|
86
|
+
get_json
|
87
|
+
@json[key] = value
|
88
|
+
@json
|
89
|
+
end
|
90
|
+
|
91
|
+
def add_attribute(key, value)
|
92
|
+
get_json
|
93
|
+
@json['attributes'][key] = value
|
94
|
+
@json
|
95
|
+
end
|
96
|
+
|
97
|
+
def get_attribute(key)
|
98
|
+
get_json
|
99
|
+
@json['attributes'][key]
|
100
|
+
end
|
101
|
+
|
102
|
+
def get_json
|
103
|
+
if !exists?
|
104
|
+
@json ||= initial_json
|
105
|
+
else
|
106
|
+
file = File.read(file_path)
|
107
|
+
@json ||= initial_json if file == ""
|
108
|
+
end
|
109
|
+
return @json if @json
|
110
|
+
@json = JSON.parse(file)
|
111
|
+
end
|
112
|
+
|
113
|
+
def save
|
114
|
+
File.open(file_path, 'w') do |f|
|
115
|
+
f.write(@json.to_json)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'ruby-progressbar'
|
2
|
+
|
3
|
+
class HeyDan::OpenCivicIdentifiers
|
4
|
+
include HeyDan
|
5
|
+
|
6
|
+
class << self
|
7
|
+
attr_accessor :jurisdiction_type
|
8
|
+
attr_accessor :jurisdictions_folder
|
9
|
+
attr_accessor :data
|
10
|
+
|
11
|
+
def name
|
12
|
+
'open_civic_data'
|
13
|
+
end
|
14
|
+
|
15
|
+
def build(opts={})
|
16
|
+
@jurisdiction_type = HeyDan.options[:type]
|
17
|
+
HeyDan::Base.load_or_create_settings
|
18
|
+
HeyDan::Base.create_folders
|
19
|
+
@jurisdictions_folder = HeyDan.folders[:jurisdictions]
|
20
|
+
download
|
21
|
+
build_jurisdiction_files
|
22
|
+
end
|
23
|
+
|
24
|
+
def download
|
25
|
+
if !HeyDan::Helper.dataset_exists?(name)
|
26
|
+
@data = HeyDan::Helper.get_data_from_url('https://github.com/opencivicdata/ocd-division-ids/blob/master/identifiers/country-us.csv?raw=true')
|
27
|
+
@data = @data[1..-1].map { |c| [c[0], c[1]]}
|
28
|
+
@data.unshift(['id', 'name'])
|
29
|
+
HeyDan::Helper.save_data(name, @data)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def build_jurisdiction_files
|
34
|
+
if @data.nil?
|
35
|
+
@data = HeyDan::Helper.get_data(name)
|
36
|
+
end
|
37
|
+
@progress = ProgressBar.create(:title => "Building Files in #{HeyDan.folders[:jurisdictions]} for jurisdictions #{('matching ' + @jurisdiction_type) if @jurisdiction_type}", :starting_at => 0, :total => @data[1..-1].size) if HeyDan.help?
|
38
|
+
@data[1..-1].each_index do |i|
|
39
|
+
row = @data[i+1]
|
40
|
+
jf = HeyDan::JurisdictionFile.new(name: row[0])
|
41
|
+
next if !jf.match_type?(@jurisdiction_type)
|
42
|
+
jf.add_identifier('open_civic_id', row[0].gsub('ocd-division/',''))
|
43
|
+
jf.add_property('name', row[1])
|
44
|
+
jf.save
|
45
|
+
@progress.progress = i if HeyDan.help?
|
46
|
+
end
|
47
|
+
@progress.finish if HeyDan.help?
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|