heydan 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/heydan/cli.rb ADDED
@@ -0,0 +1,80 @@
1
+ require 'thor'
2
+
3
+ module HeyDan
4
+
5
+ class Source < Thor
6
+
7
+ desc "sync", "sync your sources folders from the settings file"
8
+ def sync()
9
+ HeyDan::helper_text('sources_sync')
10
+ HeyDan::Sources.sync
11
+ end
12
+
13
+ desc "add GITHUB_LINK", "Add a new folder of sources from github"
14
+ def add(github_link)
15
+ HeyDan::helper_text('sources_add')
16
+ HeyDan::Sources.add(github_link)
17
+ end
18
+
19
+ desc "update NAME", "update a folder of sources"
20
+ def update(name)
21
+ HeyDan::helper_text('sources_update')
22
+ HeyDan::Sources.update(name)
23
+ end
24
+
25
+ desc "new FOLDER SOURCE VARIABLE", "adds a new source NAME in the FOLDER, and an optional VARIABLE"
26
+ def new(folder, name, variable=nil)
27
+ HeyDan::helper_text('sources_new')
28
+ HeyDan::Sources.create(folder, name, variable)
29
+ end
30
+
31
+ option :fromsource, type: :boolean
32
+ desc "build FOLDER NAME VARIABLE", "builds a source's variables in FOLDER, or optional VARIABLE. You can pass --type school_district for a specific jurisdiction type, or --from-source to build original files"
33
+ def build(folder=nil, name=nil, variable=nil)
34
+ HeyDan::helper_text('sources_build')
35
+ HeyDan.options = options
36
+ HeyDan::Sources.build(folder, name, variable)
37
+ end
38
+ end
39
+
40
+ class Cli < Thor
41
+ class_option 'type'
42
+
43
+ desc "setup DIR", "Setups HeyDan in the current directory or specified DIR"
44
+ def setup(dir=nil)
45
+ HeyDan::helper_text('setup')
46
+ HeyDan::Base.setup(dir)
47
+ end
48
+
49
+ desc "build", "Builds jurisdiction files"
50
+ def build()
51
+ HeyDan::helper_text('build')
52
+ HeyDan.options = options
53
+ HeyDan::OpenCivicIdentifiers.build
54
+ end
55
+ desc 'import', "Imports files into elasticsearch"
56
+ def import()
57
+ HeyDan::helper_text('import')
58
+ HeyDan::Import.process
59
+ end
60
+
61
+ desc "sources SUBCOMMAND ...ARGS", "manage sources"
62
+ subcommand "sources", Source
63
+
64
+ desc "server", "starts up the webserver for heydan"
65
+ def server()
66
+ puts "Serving up some HeyDan Realness"
67
+ HeyDan::Server.run!
68
+ end
69
+
70
+ desc "upload", "starts up the webserver for heydan"
71
+ def upload()
72
+ puts "Serving up some HeyDan Realness"
73
+ HeyDan::Cdn.upload
74
+ end
75
+
76
+ end
77
+
78
+
79
+
80
+ end
@@ -0,0 +1,83 @@
1
+ class HeyDan::HelpText
2
+ class << self
3
+ def setup(opts={})
4
+ return if !HeyDan.help?
5
+ puts %Q(
6
+ Hi! Adding a jurisdictions, datasets, downloads and sources directory and a settings.yml file. If you want to move these directories to other places, just update their locations in the settings file.
7
+
8
+ If you want to run heydan from a different folder than the settings.yml, create an environment variable:
9
+
10
+ export HEYDAN_SETTINGS = full/path/to/settings.yml
11
+
12
+ To turn off this help, run 'heydan help off' or set the help in settings to false.
13
+
14
+ heydan grabs datasets and information about jurisdictions. If you want to focus on just one type of jurisdiction, update the settings 'jurisdiction_type'. Or you can pass --type school_district to any heydan command.
15
+
16
+ Next, run `heydan build` to setup your files.
17
+ )
18
+ end
19
+
20
+ def build(opts={})
21
+ return if !HeyDan.help?
22
+ type = opts[:type] || 'all'
23
+ puts %Q(
24
+ Woot, building files for type #{type} jurisdictions/. You will see a progress bar below. If you didn't specify a type, it might take a while.
25
+
26
+ heydan uses the Open Civic Identifiers format to structure file names and main identification for jurisdictions. This helps create a unique nonchanging identification code for every jurisdiction, based on the sponsoring parent. So, the State of Kansas, would be country:us/state:kansas. heydan creates a flat json file for each jurisdiction, which you can then import into your own application or elasticsearch.
27
+
28
+ Next, run heydan sources sync
29
+ )
30
+ end
31
+
32
+ def sources_add
33
+ return if !HeyDan.help?
34
+ puts %Q(
35
+ You can leverage the community of developers out there. Add the github link to a source repo and tap into all that hardwork.
36
+
37
+ When you add a new one, it will get stored in your settings file under 'sources'
38
+ )
39
+ end
40
+
41
+ def sources_sync
42
+ return if !HeyDan.help?
43
+ puts %Q(
44
+ Sync all the sources in your settings file.
45
+ )
46
+ end
47
+
48
+ def sources_update
49
+ return if !HeyDan.help?
50
+ puts %Q(
51
+ Update a single source.
52
+ )
53
+ end
54
+
55
+ def sources_build
56
+ end
57
+
58
+ def sources_new
59
+ end
60
+
61
+ def git_clone(name)
62
+ return if !HeyDan.help?
63
+ puts %Q(Cloning #{name} into #{HeyDan.folders[:sources]})
64
+ end
65
+
66
+ def git_update(name)
67
+ return if !HeyDan.help?
68
+ puts %Q(Updating #{name} in #{HeyDan.folders[:sources]})
69
+ end
70
+
71
+ def build_identifier(identifier)
72
+ return if !HeyDan.help?
73
+ puts %Q("building identifiers hash for #{identifier} to filenames, this might take a moment")
74
+ end
75
+
76
+ def import
77
+ return if !HeyDan.help?
78
+ puts "This will import all the files/contents from the jurisdictions folder into ElasticSearch. Make sure it's running!"
79
+ end
80
+
81
+ end
82
+
83
+ end
@@ -0,0 +1,108 @@
1
+ require 'digest'
2
+ require 'uri'
3
+ require 'open-uri'
4
+ require 'csv'
5
+
6
+ class HeyDan::Helper
7
+
8
+ class << self
9
+
10
+ def classify(name)
11
+ name.split('_').collect(&:capitalize).join
12
+ end
13
+
14
+ def download(url)
15
+ path = HeyDan.folders[:downloads]
16
+ new_file = File.join(path, md5_name(url))
17
+ return new_file if File.exist?(new_file)
18
+ download_file(url, new_file)
19
+ new_file
20
+ end
21
+
22
+ def save_data(name, data)
23
+ full_path = File.expand_path(File.join(HeyDan.folders[:datasets], "#{name.gsub('.csv', '')}.csv"))
24
+ CSV.open(full_path, 'w') do |csv|
25
+ data.each do |row|
26
+ csv << row
27
+ end
28
+ end
29
+ end
30
+
31
+ def get_data(name)
32
+ CSV.read(File.join(HeyDan.folders[:datasets], "#{name.gsub('.csv', '')}.csv"))
33
+ end
34
+
35
+ def dataset_exists?(name)
36
+ File.exist?(File.join(HeyDan.folders[:datasets], "#{name.gsub('.csv', '')}.csv"))
37
+ end
38
+
39
+ def get_data_from_url(url)
40
+ ext = get_file_type_from_url(url)
41
+ file = download(url)
42
+ @data = case ext
43
+ when 'csv'
44
+ get_csv_data(file)
45
+ when 'zip'
46
+ files = unzip(file)
47
+ if files.size == 1
48
+ get_csv_data(files[0]) if is_csv?(files[0])
49
+ else
50
+ files.map { |f| get_csv_data(f) if is_csv?(f)}
51
+ end
52
+ when 'txt'
53
+ get_csv_data(file) if is_csv?(file)
54
+ else
55
+ get_csv_data(file) if is_csv?(file)
56
+ end
57
+ @data
58
+ end
59
+
60
+ def is_csv?(file_path)
61
+ contents = File.open(file_path, &:readline)
62
+ contents.match(/\b\t/).nil? || contents.match(/\b,/).nil? #not perfect
63
+ end
64
+
65
+ def get_csv_data(file)
66
+ contents = File.read(file, :encoding => 'utf-8').encode("UTF-8", :invalid=>:replace, :replace=>"").gsub('"',"")
67
+
68
+ if contents.include?("\t")
69
+ CSV.parse(contents, { :col_sep => "\t" })
70
+ else
71
+ CSV.parse(contents)
72
+ end
73
+
74
+ end
75
+
76
+ def md5_name(text)
77
+ Digest::MD5.hexdigest(text)
78
+ end
79
+
80
+ def download_file(url,file_path)
81
+ f = open(url)
82
+ full_path = File.expand_path(file_path)
83
+ File.open(full_path, 'wb') do |saved_file|
84
+ saved_file.write(f.read)
85
+ end
86
+ full_path
87
+ end
88
+
89
+ def get_file_type_from_url(url)
90
+ file_type = File.extname(URI.parse(url).path).gsub('.', '')
91
+ end
92
+
93
+ def unzip(file)
94
+ path = HeyDan.folders[:downloads]
95
+ require 'zip'
96
+ files = []
97
+ Zip::File.open(file) do |zip_file|
98
+ zip_file.each do |entry|
99
+ download_path = File.expand_path(File.join(path, entry.name))
100
+ entry.extract(download_path) unless File.exists?(download_path)
101
+ files << download_path
102
+ end
103
+ end
104
+ files
105
+ end
106
+
107
+ end
108
+ end
@@ -0,0 +1,49 @@
1
+ require 'ruby-progressbar'
2
+ require 'elasticsearch'
3
+
4
+ class HeyDan::Import
5
+ attr_accessor :client
6
+
7
+ class << self
8
+ def client
9
+ @client ||= Elasticsearch::Client.new host: HeyDan.elasticsearch[:url], log: false
10
+ end
11
+
12
+ def index
13
+ @index ||= 'jurisdictions'
14
+ end
15
+
16
+ def check_index
17
+ client.indices.exists? index: index
18
+ end
19
+
20
+ def create_index
21
+ client.indices.create index: index
22
+ end
23
+
24
+ def process
25
+ create_index unless check_index
26
+ total = Dir.glob("#{HeyDan.folders[:jurisdictions]}/*").size
27
+ files= Dir.glob("#{HeyDan.folders[:jurisdictions]}/*")
28
+ a=0
29
+ b=10000
30
+ progress = ProgressBar.create(:title => "Importing #{files.size} jurisdictions into Elastic Search", :starting_at => a, :total => files.size)
31
+ while true do
32
+ @bulk = []
33
+ b=( files.size - b < 10000 ? -1 : a + 10000)
34
+ files[a..b].each do |file|
35
+ jf = HeyDan::JurisdictionFile.new(name: file)
36
+ @bulk << { index: { _index: 'jurisdictions', _type: jf.type, _id: jf.hash_id, data: jf.get_json } }
37
+ end
38
+ @client.bulk refresh: true, body: @bulk; nil
39
+ a = b + 1
40
+ if b == -1
41
+ progress.finish
42
+ break
43
+ else
44
+ progress.progress = a
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,119 @@
1
+ require 'json'
2
+
3
+ class HeyDan::JurisdictionFile
4
+ attr_accessor :name
5
+ attr_accessor :json
6
+
7
+ def initialize(opts={})
8
+ @name = opts[:name]
9
+ convert_file_name if @name.include?('.json')
10
+ @name = @name.gsub('jurisdictions/','').gsub('ocd-division/','').gsub(/\.\.\//,'')
11
+ raise "Name is required" if @name.nil?
12
+ end
13
+
14
+ def type
15
+ @name.split('/')[-1].split(':')[0]
16
+ end
17
+
18
+ def id
19
+ get_json
20
+ @json['id']
21
+ end
22
+
23
+ def match_type?(ocd_type)
24
+ return true if ocd_type.nil?
25
+ !id.match(/#{ocd_type.gsub(':all', '.+')}/).nil?
26
+ end
27
+
28
+ def convert_file_name
29
+ @name = @name.split('/')[-1]
30
+ @name = "#{@name.gsub('::','/').gsub('.json','')}"
31
+ end
32
+
33
+ def file_name
34
+ "#{@name.gsub(/\//, '::').gsub('ocd-division::', '')}.json"
35
+ end
36
+
37
+ def hash_id
38
+ HeyDan::Helper.md5_name(@name.gsub(/\.\.\//,''))
39
+ end
40
+
41
+ def exists?
42
+ File.exists?(file_path)
43
+ end
44
+
45
+ def file_path
46
+ File.expand_path(File.join(HeyDan.folders[:jurisdictions], file_name))
47
+ end
48
+
49
+ def type
50
+ @name.split('/')[-1].split(':')[0]
51
+ end
52
+
53
+ def initial_json
54
+ {'id' => @name, 'entityType' => type, 'attributes'=> {}, 'identifiers' => {}, 'datasets' => []}
55
+ end
56
+
57
+ def get_identifier(key)
58
+ get_json
59
+ @json['identifiers'][key]
60
+ end
61
+
62
+ def add_identifier(key, value)
63
+ get_json
64
+ @json['identifiers'][key] = value
65
+ @json
66
+ end
67
+
68
+ def add_dataset(value)
69
+ get_json
70
+ @json['datasets'] << value
71
+ @json
72
+ end
73
+
74
+ def get_dataset(key)
75
+ get_json
76
+ @json['datasets'].select { |d| d['id']==key}[0]
77
+ end
78
+
79
+ def datasets
80
+ get_json
81
+ @json['datasets']
82
+ end
83
+
84
+ def add_property(key, value)
85
+ return false if ['datasets', 'identifiers', 'id', 'entityType', 'attributes'].include?(key)
86
+ get_json
87
+ @json[key] = value
88
+ @json
89
+ end
90
+
91
+ def add_attribute(key, value)
92
+ get_json
93
+ @json['attributes'][key] = value
94
+ @json
95
+ end
96
+
97
+ def get_attribute(key)
98
+ get_json
99
+ @json['attributes'][key]
100
+ end
101
+
102
+ def get_json
103
+ if !exists?
104
+ @json ||= initial_json
105
+ else
106
+ file = File.read(file_path)
107
+ @json ||= initial_json if file == ""
108
+ end
109
+ return @json if @json
110
+ @json = JSON.parse(file)
111
+ end
112
+
113
+ def save
114
+ File.open(file_path, 'w') do |f|
115
+ f.write(@json.to_json)
116
+ end
117
+ end
118
+
119
+ end
@@ -0,0 +1,51 @@
1
+ require 'ruby-progressbar'
2
+
3
+ class HeyDan::OpenCivicIdentifiers
4
+ include HeyDan
5
+
6
+ class << self
7
+ attr_accessor :jurisdiction_type
8
+ attr_accessor :jurisdictions_folder
9
+ attr_accessor :data
10
+
11
+ def name
12
+ 'open_civic_data'
13
+ end
14
+
15
+ def build(opts={})
16
+ @jurisdiction_type = HeyDan.options[:type]
17
+ HeyDan::Base.load_or_create_settings
18
+ HeyDan::Base.create_folders
19
+ @jurisdictions_folder = HeyDan.folders[:jurisdictions]
20
+ download
21
+ build_jurisdiction_files
22
+ end
23
+
24
+ def download
25
+ if !HeyDan::Helper.dataset_exists?(name)
26
+ @data = HeyDan::Helper.get_data_from_url('https://github.com/opencivicdata/ocd-division-ids/blob/master/identifiers/country-us.csv?raw=true')
27
+ @data = @data[1..-1].map { |c| [c[0], c[1]]}
28
+ @data.unshift(['id', 'name'])
29
+ HeyDan::Helper.save_data(name, @data)
30
+ end
31
+ end
32
+
33
+ def build_jurisdiction_files
34
+ if @data.nil?
35
+ @data = HeyDan::Helper.get_data(name)
36
+ end
37
+ @progress = ProgressBar.create(:title => "Building Files in #{HeyDan.folders[:jurisdictions]} for jurisdictions #{('matching ' + @jurisdiction_type) if @jurisdiction_type}", :starting_at => 0, :total => @data[1..-1].size) if HeyDan.help?
38
+ @data[1..-1].each_index do |i|
39
+ row = @data[i+1]
40
+ jf = HeyDan::JurisdictionFile.new(name: row[0])
41
+ next if !jf.match_type?(@jurisdiction_type)
42
+ jf.add_identifier('open_civic_id', row[0].gsub('ocd-division/',''))
43
+ jf.add_property('name', row[1])
44
+ jf.save
45
+ @progress.progress = i if HeyDan.help?
46
+ end
47
+ @progress.finish if HeyDan.help?
48
+ end
49
+ end
50
+
51
+ end