morph-cli 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YTg4MTEwYzQ4ZDMyMWVkNjg1ZDk4MDY0YzllZTQ1ODhjOTFlOWQ3MA==
5
+ data.tar.gz: !binary |-
6
+ Y2U0NGJkMDU5MTdhNWNmMjlhYTc3ZTg4ZjI3ZmVkNDllNjdhNjQ5MQ==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ ZWE1NDAwYjk4YmIxMmMwMjk0ZDQ0YzliZjExNGZhMjQzZDBhNGMyOWQ0ODQ5
10
+ NjY2YTkwNDRlZGIwYWRkYzEwOTk0NWNiYzdiYWMyMjI2ZTE3NDQ5YWY5OWUz
11
+ NTU0N2Q3MTg2OGU3OTdlODU4MDdiMzUyNzYzNDg0ZTMzZjlhMTg=
12
+ data.tar.gz: !binary |-
13
+ MGM4Y2VkMWFkMmYyMzQ0ZmY0MzBmOTQ4MTBhMzVmYmE5MDdjNzhhYzJmNzQy
14
+ YTAwZDAwOTAwYzY4M2Q3YmM3OWY3ODRiYzBjZTJiZjQzNDFmYjFlZjAxODYw
15
+ NzU1MTJkZGMyN2ZmNThiZDZiNWJiOTU5ZjkxNDA3MzI1OTI2ZDM=
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in morph.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 OpenAustralia Foundation
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Morph
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'morph-cli'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install morph-cli
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bin/morph ADDED
@@ -0,0 +1,136 @@
1
+ #!/usr/bin/env ruby
2
+ # Commandline client for controlling morph and running scrapers and things
3
+
4
+ require "thor"
5
+ require "rest_client"
6
+ # TODO Do compression on the tar file
7
+ #require 'zlib'
8
+ require 'archive/tar/minitar'
9
+ require 'pathname'
10
+ require 'json'
11
+
12
+ class MorphThor < Thor
13
+ class_option :dev, default: false, type: :boolean, desc: "Run against a local dev of Morph running at http://localhost:3000"
14
+
15
+ desc "[execute]", "execute morph scraper"
16
+ option :directory, :default => Dir.getwd
17
+
18
+ def execute
19
+ api_key = retrieve_api_key
20
+ if api_key.nil?
21
+ api_key = ask("What is your key? (Go to #{base_url(options)}/settings)")
22
+ save_api_key(api_key)
23
+ end
24
+
25
+ api_key_is_valid = false
26
+ until api_key_is_valid
27
+ begin
28
+ puts "Uploading and running..."
29
+ file = create_tar(options[:directory], all_paths(options[:directory]))
30
+ result = RestClient.post("#{base_url(options)}/run", :api_key => api_key, :code => file)
31
+ api_key_is_valid = true
32
+ rescue RestClient::Unauthorized
33
+ puts "Your key isn't working. Let's try again."
34
+ api_key = ask("What is your key? (Go to #{base_url(options)}/settings)")
35
+ save_api_key(api_key)
36
+ end
37
+ end
38
+ # Interpret each line separately as json
39
+ result.split("\n").each do |line|
40
+ a = JSON.parse(line)
41
+ if a["stream"] == "stdout"
42
+ s = $stdout
43
+ elsif a["stream"] == "stderr"
44
+ s = $stderr
45
+ else
46
+ raise "Unknown stream"
47
+ end
48
+ s.puts a["text"]
49
+ end
50
+ end
51
+
52
+ no_commands {
53
+ def base_url(options)
54
+ if options[:dev]
55
+ "http://127.0.0.1:3000"
56
+ else
57
+ "https://morph.io"
58
+ end
59
+ end
60
+
61
+ def config_path
62
+ File.join(Dir.home, ".morph")
63
+ end
64
+
65
+ def save_api_key(api_key)
66
+ configuration = {api_key: api_key}
67
+ File.open(config_path, "w") {|f| f.write configuration.to_yaml}
68
+ File.chmod(0600, config_path)
69
+ end
70
+
71
+ def retrieve_api_key
72
+ if File.exists?(config_path)
73
+ YAML.load(File.read(config_path))[:api_key]
74
+ end
75
+ end
76
+
77
+ # TODO Temporary file should be named differently every time
78
+ def create_dir_tar(directory)
79
+ in_directory(directory) do
80
+ tempfile = File.new('/tmp/out', 'wb')
81
+ Archive::Tar::Minitar.pack('.', tempfile)
82
+ File.new('/tmp/out', 'r')
83
+ end
84
+ end
85
+
86
+ def in_directory(directory)
87
+ cwd = FileUtils.pwd
88
+ FileUtils.cd(directory)
89
+ yield
90
+ ensure
91
+ FileUtils.cd(cwd)
92
+ end
93
+
94
+ def create_tar(directory, paths)
95
+ tempfile = File.new('/tmp/out', 'wb')
96
+
97
+ in_directory(directory) do
98
+ begin
99
+ tar = Archive::Tar::Minitar::Output.new("/tmp/out")
100
+ paths.each do |entry|
101
+ Archive::Tar::Minitar.pack_file(entry, tar)
102
+ end
103
+ ensure
104
+ tar.close
105
+ end
106
+ end
107
+ File.new('/tmp/out', 'r')
108
+ end
109
+
110
+ # Relative paths to all the files in the given directory (recursive)
111
+ # (except for anything below a directory starting with ".")
112
+ def all_paths(directory)
113
+ result = []
114
+ Find.find(directory) do |path|
115
+ if FileTest.directory?(path)
116
+ if File.basename(path)[0] == ?.
117
+ Find.prune
118
+ end
119
+ else
120
+ result << Pathname.new(path).relative_path_from(Pathname.new(directory)).to_s
121
+ end
122
+ end
123
+ result
124
+ end
125
+
126
+ # Relative path of database file (if it exists)
127
+ def database_path(directory)
128
+ path = "data.sqlite"
129
+ path if File.exists?(File.join(directory, path))
130
+ end
131
+ }
132
+
133
+ end
134
+
135
+ # If morph is run without any parameters it's the same as "morph execute"
136
+ MorphThor.start(ARGV.empty? ? ["execute"] : ARGV)
data/lib/morph-cli.rb ADDED
@@ -0,0 +1,5 @@
1
+ require "morph-cli/version"
2
+
3
+ module MorphCLI
4
+ # Your code goes here...
5
+ end
@@ -0,0 +1,3 @@
1
+ module MorphCLI
2
+ VERSION = "0.0.1"
3
+ end
data/morph-cli.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'morph-cli/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "morph-cli"
8
+ spec.version = MorphCLI::VERSION
9
+ spec.authors = ["Matthew Landauer"]
10
+ spec.email = ["matthew@oaf.org.au"]
11
+ spec.description = %q{Command line interface for Morph}
12
+ spec.summary = %q{Command line interface for Morph}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "thor"
22
+ spec.add_dependency "rest-client"
23
+ spec.add_dependency 'archive-tar-minitar'
24
+
25
+ spec.add_development_dependency "bundler", "~> 1.3"
26
+ spec.add_development_dependency "rake"
27
+
28
+ spec.executables = %w(morph)
29
+ end
data/scraper.rb ADDED
@@ -0,0 +1,71 @@
1
+ require 'scraperwiki'
2
+ require 'mechanize'
3
+ require 'logger'
4
+
5
+ starting_url = 'https://www2.bmcc.nsw.gov.au/datracking/Modules/applicationmaster/default.aspx?page=exhibit'
6
+ comment_url = 'http://www.bmcc.nsw.gov.au/sustainableliving/developmentapplicationsinnotification'
7
+
8
+ def clean_whitespace(a)
9
+ a.gsub("\r", ' ').gsub("\n", ' ').squeeze(" ").strip
10
+ end
11
+
12
+ def scrape_table(doc, comment_url)
13
+ doc.search('table tbody tr').each do |tr|
14
+ # Columns in table
15
+ # Show Number Exhibit Start Exhibit End Details Village
16
+ tds = tr.search('td')
17
+ h = tds.map{|td| td.inner_html}
18
+
19
+ record = {
20
+ 'info_url' => (doc.uri + tds[0].at('a')['href']).to_s,
21
+ 'comment_url' => comment_url,
22
+ 'council_reference' => clean_whitespace(h[1]),
23
+ 'on_notice_from' => Date.strptime(clean_whitespace(h[2]),"%d/%m/%Y").to_s,
24
+ 'on_notice_to' => Date.strptime(clean_whitespace(h[3]), "%d/%m/%Y").to_s,
25
+ 'address' => clean_whitespace(h[4].split('<br>')[0]) + ", " + clean_whitespace(h[5]) + ", NSW",
26
+ 'description' => clean_whitespace(h[4].split('<br>')[1..-1].join),
27
+ 'date_scraped' => Date.today.to_s
28
+ }
29
+
30
+ #p record
31
+ if (ScraperWiki.select("* from data where `council_reference`='#{record['council_reference']}'").empty? rescue true)
32
+ p record
33
+ ScraperWiki.save_sqlite(['council_reference'], record)
34
+ else
35
+ puts "Skipping already saved record " + record['council_reference']
36
+ end
37
+ end
38
+ end
39
+
40
+ def scrape_and_follow_next_link(doc, comment_url)
41
+ scrape_table(doc, comment_url)
42
+ nextButton = doc.at('.rgPageNext')
43
+ unless nextButton['onclick'] =~ /return false/
44
+ form = doc.forms.first
45
+
46
+ # The joy of dealing with ASP.NET
47
+ form['__EVENTTARGET'] = nextButton['name']
48
+ form['__EVENTARGUMENT'] = ''
49
+ # It doesn't seem to work without these stupid values being set.
50
+ # Would be good to figure out where precisely in the javascript these values are coming from.
51
+ form['ctl00%24RadScriptManager1']=
52
+ 'ctl00%24cphContent%24ctl00%24ctl00%24cphContent%24ctl00%24Radajaxpanel2Panel%7Cctl00%24cphContent%24ctl00%24ctl00%24RadGrid1%24ctl00%24ctl03%24ctl01%24ctl10'
53
+ form['ctl00_RadScriptManager1_HiddenField']=
54
+ '%3B%3BSystem.Web.Extensions%2C%20Version%3D3.5.0.0%2C%20Culture%3Dneutral%2C%20PublicKeyToken%3D31bf3856ad364e35%3Aen-US%3A0d787d5c-3903-4814-ad72-296cea810318%3Aea597d4b%3Ab25378d2%3BTelerik.Web.UI%2C%20Version%3D2009.1.527.35%2C%20Culture%3Dneutral%2C%20PublicKeyToken%3D121fae78165ba3d4%3Aen-US%3A1e3fef00-f492-4ed8-96ce-6371bc241e1c%3A16e4e7cd%3Af7645509%3A24ee1bba%3Ae330518b%3A1e771326%3Ac8618e41%3A4cacbc31%3A8e6f0d33%3Aed16cbdc%3A58366029%3Aaa288e2d'
55
+ doc = form.submit(form.button_with(:name => nextButton['name']))
56
+ scrape_and_follow_next_link(doc, comment_url)
57
+ end
58
+ end
59
+
60
+ #ScraperWiki.save_metadata('authority_name', 'Blue Mountains City Council')
61
+ #ScraperWiki.save_metadata('authority_short', 'Blue Mountains')
62
+ #ScraperWiki.save_metadata('state', 'NSW')
63
+
64
+ # Using Mechanize to grab the page because ScraperWiki.scrape bombed out on me
65
+ agent = Mechanize.new do |a|
66
+ a.verify_mode = OpenSSL::SSL::VERIFY_NONE
67
+ end
68
+
69
+ doc = agent.get(starting_url)
70
+ scrape_and_follow_next_link(doc, comment_url)
71
+
metadata ADDED
@@ -0,0 +1,126 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: morph-cli
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Matthew Landauer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-01-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: thor
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rest-client
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: archive-tar-minitar
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '1.3'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: '1.3'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Command line interface for Morph
84
+ email:
85
+ - matthew@oaf.org.au
86
+ executables:
87
+ - morph
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - .gitignore
92
+ - Gemfile
93
+ - Gemfile.lock
94
+ - LICENSE.txt
95
+ - README.md
96
+ - Rakefile
97
+ - bin/morph
98
+ - lib/morph-cli.rb
99
+ - lib/morph-cli/version.rb
100
+ - morph-cli.gemspec
101
+ - scraper.rb
102
+ homepage: ''
103
+ licenses:
104
+ - MIT
105
+ metadata: {}
106
+ post_install_message:
107
+ rdoc_options: []
108
+ require_paths:
109
+ - lib
110
+ required_ruby_version: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - ! '>='
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ required_rubygems_version: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - ! '>='
118
+ - !ruby/object:Gem::Version
119
+ version: '0'
120
+ requirements: []
121
+ rubyforge_project:
122
+ rubygems_version: 2.0.6
123
+ signing_key:
124
+ specification_version: 4
125
+ summary: Command line interface for Morph
126
+ test_files: []