heydan 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/heydan/cli.rb ADDED
@@ -0,0 +1,80 @@
1
+ require 'thor'
2
+
3
+ module HeyDan
4
+
5
+ class Source < Thor
6
+
7
+ desc "sync", "sync your sources folders from the settings file"
8
+ def sync()
9
+ HeyDan::helper_text('sources_sync')
10
+ HeyDan::Sources.sync
11
+ end
12
+
13
+ desc "add GITHUB_LINK", "Add a new folder of sources from github"
14
+ def add(github_link)
15
+ HeyDan::helper_text('sources_add')
16
+ HeyDan::Sources.add(github_link)
17
+ end
18
+
19
+ desc "update NAME", "update a folder of sources"
20
+ def update(name)
21
+ HeyDan::helper_text('sources_update')
22
+ HeyDan::Sources.update(name)
23
+ end
24
+
25
+ desc "new FOLDER SOURCE VARIABLE", "adds a new source NAME in the FOLDER, and an optional VARIABLE"
26
+ def new(folder, name, variable=nil)
27
+ HeyDan::helper_text('sources_new')
28
+ HeyDan::Sources.create(folder, name, variable)
29
+ end
30
+
31
+ option :fromsource, type: :boolean
32
+ desc "build FOLDER NAME VARIABLE", "builds a source's variables in FOLDER, or optional VARIABLE. You can pass --type school_district for a specific jurisdiction type, or --from-source to build original files"
33
+ def build(folder=nil, name=nil, variable=nil)
34
+ HeyDan::helper_text('sources_build')
35
+ HeyDan.options = options
36
+ HeyDan::Sources.build(folder, name, variable)
37
+ end
38
+ end
39
+
40
+ class Cli < Thor
41
+ class_option 'type'
42
+
43
+ desc "setup DIR", "Setups HeyDan in the current directory or specified DIR"
44
+ def setup(dir=nil)
45
+ HeyDan::helper_text('setup')
46
+ HeyDan::Base.setup(dir)
47
+ end
48
+
49
+ desc "build", "Builds jurisdiction files"
50
+ def build()
51
+ HeyDan::helper_text('build')
52
+ HeyDan.options = options
53
+ HeyDan::OpenCivicIdentifiers.build
54
+ end
55
+ desc 'import', "Imports files into elasticsearch"
56
+ def import()
57
+ HeyDan::helper_text('import')
58
+ HeyDan::Import.process
59
+ end
60
+
61
+ desc "sources SUBCOMMAND ...ARGS", "manage sources"
62
+ subcommand "sources", Source
63
+
64
+ desc "server", "starts up the webserver for heydan"
65
+ def server()
66
+ puts "Serving up some HeyDan Realness"
67
+ HeyDan::Server.run!
68
+ end
69
+
70
+ desc "upload", "starts up the webserver for heydan"
71
+ def upload()
72
+ puts "Serving up some HeyDan Realness"
73
+ HeyDan::Cdn.upload
74
+ end
75
+
76
+ end
77
+
78
+
79
+
80
+ end
@@ -0,0 +1,83 @@
1
+ class HeyDan::HelpText
2
+ class << self
3
+ def setup(opts={})
4
+ return if !HeyDan.help?
5
+ puts %Q(
6
+ Hi! Adding a jurisdictions, datasets, downloads and sources directory and a settings.yml file. If you want to move these directories to other places, just update their locations in the settings file.
7
+
8
+ If you want to run heydan from a different folder than the settings.yml, create an environment variable:
9
+
10
+ export HEYDAN_SETTINGS = full/path/to/settings.yml
11
+
12
+ To turn off this help, run 'heydan help off' or set the help in settings to false.
13
+
14
+ heydan grabs datasets and information about jurisdictions. If you want to focus on just one type of jurisdiction, update the settings 'jurisdiction_type'. Or you can pass --type school_district to any heydan command.
15
+
16
+ Next, run `heydan build` to setup your files.
17
+ )
18
+ end
19
+
20
+ def build(opts={})
21
+ return if !HeyDan.help?
22
+ type = opts[:type] || 'all'
23
+ puts %Q(
24
+ Woot, building files for type #{type} jurisdictions/. You will see a progress bar below. If you didn't specify a type, it might take a while.
25
+
26
+ heydan uses the Open Civic Identifiers format to structure file names and main identification for jurisdictions. This helps create a unique nonchanging identification code for every jurisdiction, based on the sponsoring parent. So, the State of Kansas, would be country:us/state:kansas. heydan creates a flat json file for each jurisdiction, which you can then import into your own application or elasticsearch.
27
+
28
+ Next, run heydan sources sync
29
+ )
30
+ end
31
+
32
+ def sources_add
33
+ return if !HeyDan.help?
34
+ puts %Q(
35
+ You can leverage the community of developers out there. Add the github link to a source repo and tap into all that hardwork.
36
+
37
+ When you add a new one, it will get stored in your settings file under 'sources'
38
+ )
39
+ end
40
+
41
+ def sources_sync
42
+ return if !HeyDan.help?
43
+ puts %Q(
44
+ Sync all the sources in your settings file.
45
+ )
46
+ end
47
+
48
+ def sources_update
49
+ return if !HeyDan.help?
50
+ puts %Q(
51
+ Update a single source.
52
+ )
53
+ end
54
+
55
+ def sources_build
56
+ end
57
+
58
+ def sources_new
59
+ end
60
+
61
+ def git_clone(name)
62
+ return if !HeyDan.help?
63
+ puts %Q(Cloning #{name} into #{HeyDan.folders[:sources]})
64
+ end
65
+
66
+ def git_update(name)
67
+ return if !HeyDan.help?
68
+ puts %Q(Updating #{name} in #{HeyDan.folders[:sources]})
69
+ end
70
+
71
+ def build_identifier(identifier)
72
+ return if !HeyDan.help?
73
+ puts %Q("building identifiers hash for #{identifier} to filenames, this might take a moment")
74
+ end
75
+
76
+ def import
77
+ return if !HeyDan.help?
78
+ puts "This will import all the files/contents from the jurisdictions folder into ElasticSearch. Make sure it's running!"
79
+ end
80
+
81
+ end
82
+
83
+ end
@@ -0,0 +1,108 @@
1
+ require 'digest'
2
+ require 'uri'
3
+ require 'open-uri'
4
+ require 'csv'
5
+
6
+ class HeyDan::Helper
7
+
8
+ class << self
9
+
10
+ def classify(name)
11
+ name.split('_').collect(&:capitalize).join
12
+ end
13
+
14
+ def download(url)
15
+ path = HeyDan.folders[:downloads]
16
+ new_file = File.join(path, md5_name(url))
17
+ return new_file if File.exist?(new_file)
18
+ download_file(url, new_file)
19
+ new_file
20
+ end
21
+
22
+ def save_data(name, data)
23
+ full_path = File.expand_path(File.join(HeyDan.folders[:datasets], "#{name.gsub('.csv', '')}.csv"))
24
+ CSV.open(full_path, 'w') do |csv|
25
+ data.each do |row|
26
+ csv << row
27
+ end
28
+ end
29
+ end
30
+
31
+ def get_data(name)
32
+ CSV.read(File.join(HeyDan.folders[:datasets], "#{name.gsub('.csv', '')}.csv"))
33
+ end
34
+
35
+ def dataset_exists?(name)
36
+ File.exist?(File.join(HeyDan.folders[:datasets], "#{name.gsub('.csv', '')}.csv"))
37
+ end
38
+
39
+ def get_data_from_url(url)
40
+ ext = get_file_type_from_url(url)
41
+ file = download(url)
42
+ @data = case ext
43
+ when 'csv'
44
+ get_csv_data(file)
45
+ when 'zip'
46
+ files = unzip(file)
47
+ if files.size == 1
48
+ get_csv_data(files[0]) if is_csv?(files[0])
49
+ else
50
+ files.map { |f| get_csv_data(f) if is_csv?(f)}
51
+ end
52
+ when 'txt'
53
+ get_csv_data(file) if is_csv?(file)
54
+ else
55
+ get_csv_data(file) if is_csv?(file)
56
+ end
57
+ @data
58
+ end
59
+
60
+ def is_csv?(file_path)
61
+ contents = File.open(file_path, &:readline)
62
+ contents.match(/\b\t/).nil? || contents.match(/\b,/).nil? #not perfect
63
+ end
64
+
65
+ def get_csv_data(file)
66
+ contents = File.read(file, :encoding => 'utf-8').encode("UTF-8", :invalid=>:replace, :replace=>"").gsub('"',"")
67
+
68
+ if contents.include?("\t")
69
+ CSV.parse(contents, { :col_sep => "\t" })
70
+ else
71
+ CSV.parse(contents)
72
+ end
73
+
74
+ end
75
+
76
+ def md5_name(text)
77
+ Digest::MD5.hexdigest(text)
78
+ end
79
+
80
+ def download_file(url,file_path)
81
+ f = open(url)
82
+ full_path = File.expand_path(file_path)
83
+ File.open(full_path, 'wb') do |saved_file|
84
+ saved_file.write(f.read)
85
+ end
86
+ full_path
87
+ end
88
+
89
+ def get_file_type_from_url(url)
90
+ file_type = File.extname(URI.parse(url).path).gsub('.', '')
91
+ end
92
+
93
+ def unzip(file)
94
+ path = HeyDan.folders[:downloads]
95
+ require 'zip'
96
+ files = []
97
+ Zip::File.open(file) do |zip_file|
98
+ zip_file.each do |entry|
99
+ download_path = File.expand_path(File.join(path, entry.name))
100
+ entry.extract(download_path) unless File.exists?(download_path)
101
+ files << download_path
102
+ end
103
+ end
104
+ files
105
+ end
106
+
107
+ end
108
+ end
@@ -0,0 +1,49 @@
1
+ require 'ruby-progressbar'
2
+ require 'elasticsearch'
3
+
4
+ class HeyDan::Import
5
+ attr_accessor :client
6
+
7
+ class << self
8
+ def client
9
+ @client ||= Elasticsearch::Client.new host: HeyDan.elasticsearch[:url], log: false
10
+ end
11
+
12
+ def index
13
+ @index ||= 'jurisdictions'
14
+ end
15
+
16
+ def check_index
17
+ client.indices.exists? index: index
18
+ end
19
+
20
+ def create_index
21
+ client.indices.create index: index
22
+ end
23
+
24
+ def process
25
+ create_index unless check_index
26
+ total = Dir.glob("#{HeyDan.folders[:jurisdictions]}/*").size
27
+ files= Dir.glob("#{HeyDan.folders[:jurisdictions]}/*")
28
+ a=0
29
+ b=10000
30
+ progress = ProgressBar.create(:title => "Importing #{files.size} jurisdictions into Elastic Search", :starting_at => a, :total => files.size)
31
+ while true do
32
+ @bulk = []
33
+ b=( files.size - b < 10000 ? -1 : a + 10000)
34
+ files[a..b].each do |file|
35
+ jf = HeyDan::JurisdictionFile.new(name: file)
36
+ @bulk << { index: { _index: 'jurisdictions', _type: jf.type, _id: jf.hash_id, data: jf.get_json } }
37
+ end
38
+ @client.bulk refresh: true, body: @bulk; nil
39
+ a = b + 1
40
+ if b == -1
41
+ progress.finish
42
+ break
43
+ else
44
+ progress.progress = a
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,119 @@
1
+ require 'json'
2
+
3
+ class HeyDan::JurisdictionFile
4
+ attr_accessor :name
5
+ attr_accessor :json
6
+
7
+ def initialize(opts={})
8
+ @name = opts[:name]
9
+ convert_file_name if @name.include?('.json')
10
+ @name = @name.gsub('jurisdictions/','').gsub('ocd-division/','').gsub(/\.\.\//,'')
11
+ raise "Name is required" if @name.nil?
12
+ end
13
+
14
+ def type
15
+ @name.split('/')[-1].split(':')[0]
16
+ end
17
+
18
+ def id
19
+ get_json
20
+ @json['id']
21
+ end
22
+
23
+ def match_type?(ocd_type)
24
+ return true if ocd_type.nil?
25
+ !id.match(/#{ocd_type.gsub(':all', '.+')}/).nil?
26
+ end
27
+
28
+ def convert_file_name
29
+ @name = @name.split('/')[-1]
30
+ @name = "#{@name.gsub('::','/').gsub('.json','')}"
31
+ end
32
+
33
+ def file_name
34
+ "#{@name.gsub(/\//, '::').gsub('ocd-division::', '')}.json"
35
+ end
36
+
37
+ def hash_id
38
+ HeyDan::Helper.md5_name(@name.gsub(/\.\.\//,''))
39
+ end
40
+
41
+ def exists?
42
+ File.exists?(file_path)
43
+ end
44
+
45
+ def file_path
46
+ File.expand_path(File.join(HeyDan.folders[:jurisdictions], file_name))
47
+ end
48
+
49
+ def type
50
+ @name.split('/')[-1].split(':')[0]
51
+ end
52
+
53
+ def initial_json
54
+ {'id' => @name, 'entityType' => type, 'attributes'=> {}, 'identifiers' => {}, 'datasets' => []}
55
+ end
56
+
57
+ def get_identifier(key)
58
+ get_json
59
+ @json['identifiers'][key]
60
+ end
61
+
62
+ def add_identifier(key, value)
63
+ get_json
64
+ @json['identifiers'][key] = value
65
+ @json
66
+ end
67
+
68
+ def add_dataset(value)
69
+ get_json
70
+ @json['datasets'] << value
71
+ @json
72
+ end
73
+
74
+ def get_dataset(key)
75
+ get_json
76
+ @json['datasets'].select { |d| d['id']==key}[0]
77
+ end
78
+
79
+ def datasets
80
+ get_json
81
+ @json['datasets']
82
+ end
83
+
84
+ def add_property(key, value)
85
+ return false if ['datasets', 'identifiers', 'id', 'entityType', 'attributes'].include?(key)
86
+ get_json
87
+ @json[key] = value
88
+ @json
89
+ end
90
+
91
+ def add_attribute(key, value)
92
+ get_json
93
+ @json['attributes'][key] = value
94
+ @json
95
+ end
96
+
97
+ def get_attribute(key)
98
+ get_json
99
+ @json['attributes'][key]
100
+ end
101
+
102
+ def get_json
103
+ if !exists?
104
+ @json ||= initial_json
105
+ else
106
+ file = File.read(file_path)
107
+ @json ||= initial_json if file == ""
108
+ end
109
+ return @json if @json
110
+ @json = JSON.parse(file)
111
+ end
112
+
113
+ def save
114
+ File.open(file_path, 'w') do |f|
115
+ f.write(@json.to_json)
116
+ end
117
+ end
118
+
119
+ end
@@ -0,0 +1,51 @@
1
+ require 'ruby-progressbar'
2
+
3
+ class HeyDan::OpenCivicIdentifiers
4
+ include HeyDan
5
+
6
+ class << self
7
+ attr_accessor :jurisdiction_type
8
+ attr_accessor :jurisdictions_folder
9
+ attr_accessor :data
10
+
11
+ def name
12
+ 'open_civic_data'
13
+ end
14
+
15
+ def build(opts={})
16
+ @jurisdiction_type = HeyDan.options[:type]
17
+ HeyDan::Base.load_or_create_settings
18
+ HeyDan::Base.create_folders
19
+ @jurisdictions_folder = HeyDan.folders[:jurisdictions]
20
+ download
21
+ build_jurisdiction_files
22
+ end
23
+
24
+ def download
25
+ if !HeyDan::Helper.dataset_exists?(name)
26
+ @data = HeyDan::Helper.get_data_from_url('https://github.com/opencivicdata/ocd-division-ids/blob/master/identifiers/country-us.csv?raw=true')
27
+ @data = @data[1..-1].map { |c| [c[0], c[1]]}
28
+ @data.unshift(['id', 'name'])
29
+ HeyDan::Helper.save_data(name, @data)
30
+ end
31
+ end
32
+
33
+ def build_jurisdiction_files
34
+ if @data.nil?
35
+ @data = HeyDan::Helper.get_data(name)
36
+ end
37
+ @progress = ProgressBar.create(:title => "Building Files in #{HeyDan.folders[:jurisdictions]} for jurisdictions #{('matching ' + @jurisdiction_type) if @jurisdiction_type}", :starting_at => 0, :total => @data[1..-1].size) if HeyDan.help?
38
+ @data[1..-1].each_index do |i|
39
+ row = @data[i+1]
40
+ jf = HeyDan::JurisdictionFile.new(name: row[0])
41
+ next if !jf.match_type?(@jurisdiction_type)
42
+ jf.add_identifier('open_civic_id', row[0].gsub('ocd-division/',''))
43
+ jf.add_property('name', row[1])
44
+ jf.save
45
+ @progress.progress = i if HeyDan.help?
46
+ end
47
+ @progress.finish if HeyDan.help?
48
+ end
49
+ end
50
+
51
+ end