law-japan 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 35013c851c57f4a9463eae7dd4edc48414ad4bb5
4
+ data.tar.gz: cfa5e29eb1a5e8f36474b8dda4d3e259e324fdbd
5
+ SHA512:
6
+ metadata.gz: 0f8f6bae3c58f198213337f344b3a4f8d0c3c215a1f5eb2e1718f1ecd0a8fab120fa4c7d9b1f1c7a08629ec279105111a468bcdea6af4e1a56ac1f91648e4b6b
7
+ data.tar.gz: b75ce8cc1fe7f348bc659836f250f96fc288d2fe8474f366ec8de1324468b808e578b16d48730b7fa59c46190bf1b861a2528f07f74913b2e10575c5e41e27dd
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .*.swp
data/.pryrc ADDED
@@ -0,0 +1 @@
1
+ require "law/japan"
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in law-japan.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Ryosuke IWANAGA
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Law::Japan
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'law-japan'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install law-japan
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it ( http://github.com/<my-github-username>/law-japan/fork )
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/law-japan.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'law/japan/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "law-japan"
8
+ spec.version = Law::Japan::VERSION
9
+ spec.authors = ["Ryosuke IWANAGA"]
10
+ spec.email = ["riywo.jp@gmail.com"]
11
+ spec.summary = %q{Operating Japanese laws}
12
+ spec.description = %q{This is a library for Japanese laws}
13
+ spec.homepage = "https://github.com/riywo/law-japan"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "git"
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.5"
24
+ spec.add_development_dependency "rake"
25
+ spec.add_development_dependency "pry"
26
+ spec.add_development_dependency "mechanize"
27
+ spec.add_development_dependency "nokogiri"
28
+ end
@@ -0,0 +1,45 @@
1
+ require "law/japan/e_gov"
2
+ require "find"
3
+ require "nokogiri"
4
+ require "logger"
5
+
6
+ class Law::Japan::EGov::Converter
7
+ attr_reader :html_dir, :text_dir
8
+
9
+ def initialize(html_dir, text_dir)
10
+ @html_dir = html_dir
11
+ @text_dir = text_dir
12
+ end
13
+
14
+ def convert!
15
+ logger.info "Start converting all laws"
16
+ convert
17
+ logger.info "Finish converting all laws"
18
+ end
19
+
20
+ private
21
+
22
+ def logger
23
+ @logger ||= Logger.new STDOUT
24
+ end
25
+
26
+ def convert
27
+ Dir.chdir(html_dir) do
28
+ Dir.glob(File.join("**", "*.html")) do |path|
29
+ convert_html(path)
30
+ end
31
+ end
32
+ end
33
+
34
+ def convert_html(path)
35
+ dirname = File.dirname(path)
36
+ basename = File.basename(path, ".html")
37
+ target_dir = File.join(text_dir, dirname)
38
+ target_file = File.join(target_dir, "#{basename}.txt")
39
+ logger.info "Converting to #{target_file}"
40
+
41
+ FileUtils.mkdir_p target_dir
42
+ text = Nokogiri::HTML(open(path)).css("body").first.text
43
+ File.write(target_file, text)
44
+ end
45
+ end
@@ -0,0 +1,75 @@
1
+ require "law/japan/e_gov"
2
+ require "mechanize"
3
+
4
+ class Law::Japan::EGov::Downloader
5
+ module Mechanize::Form::Clearable
6
+ refine Mechanize::Form do
7
+ def clear_buttons
8
+ @clicked_buttons = []
9
+ end
10
+ end
11
+ end
12
+ using Mechanize::Form::Clearable
13
+
14
+ attr_reader :root_dir
15
+ def initialize(root_dir)
16
+ @root_dir = root_dir
17
+ end
18
+
19
+ def download!
20
+ logger.info "Start downloading all laws"
21
+ download
22
+ logger.info "Finish downloading all laws"
23
+ end
24
+
25
+ private
26
+
27
+ def logger
28
+ @logger ||= Logger.new STDOUT
29
+ end
30
+
31
+ def agent
32
+ unless @agent
33
+ @agent = Mechanize.new { |a| a.user_agent_alias = "Windows IE 9" }
34
+ @agent.log = Logger.new STDOUT
35
+ end
36
+ @agent
37
+ end
38
+
39
+ def index_page
40
+ @index_page ||= agent.get("http://law.e-gov.go.jp/cgi-bin/idxsearch.cgi")
41
+ end
42
+
43
+ def category_form
44
+ index_page.forms_with(name: "index")[2]
45
+ end
46
+
47
+ def download
48
+ category_form.buttons.each do |button|
49
+ category_name = button.node.next.text.gsub(/[  ]+/, "")
50
+
51
+ category_form.clear_buttons
52
+ list_page = agent.submit(category_form, button)
53
+ sleep 1
54
+
55
+ list_page.links.each do |link|
56
+ law_name = link.text
57
+ h_file_name = CGI.parse(link.uri.query)["H_FILE_NAME"].first
58
+ if h_file_name =~ /^([MTSH]\d{2})/
59
+ law_url = "http://law.e-gov.go.jp/htmldata/#{$1}/#{h_file_name}.html"
60
+ law_file = File.join(root_dir, category_name, "#{h_file_name}.html")
61
+ if File.exists? law_file
62
+ logger.info "File already exists for #{law_name} (#{law_file})"
63
+ else
64
+ logger.info "Start downloading for #{law_name} (url: #{law_url}, file: #{law_file})"
65
+ agent.download(law_url, law_file)
66
+ logger.info "Finish downloading for #{law_name} (url: #{law_url}, file: #{law_file})"
67
+ sleep 2
68
+ end
69
+ else
70
+ logger.warn "Invalid H_FILE_NAME #{h_file_name} for #{law_file}"
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,55 @@
1
+ require "law/japan"
2
+
3
+ require "git"
4
+ require "logger"
5
+
6
+ class Law::Japan::EGov
7
+ SourceDir = File.join(Dir.home, ".law-japan")
8
+
9
+ HtmlRepoURL = "git@github.com:riywo/law-japan-e_gov-html.git"
10
+ TextRepoURL = "git@github.com:riywo/law-japan-e_gov-text.git"
11
+
12
+ def initialize
13
+ FileUtils.mkdir_p SourceDir
14
+ end
15
+
16
+ def update!
17
+ html_git.pull
18
+ text_git.pull
19
+ end
20
+
21
+ def download!
22
+ Downloader.new(html_data_dir).download!
23
+ end
24
+
25
+ def convert!
26
+ Converter.new(html_data_dir, text_data_dir).convert!
27
+ end
28
+
29
+ private
30
+
31
+ def html_data_dir
32
+ File.join(html_git.dir.path, "data")
33
+ end
34
+
35
+ def text_data_dir
36
+ File.join(text_git.dir.path, "data")
37
+ end
38
+
39
+ def git_open_or_clone(repo_url, name)
40
+ Git.open(File.join(SourceDir, name), log: Logger.new(STDOUT))
41
+ rescue
42
+ Git.clone(repo_url, name, path: SourceDir, log: Logger.new(STDOUT))
43
+ end
44
+
45
+ def html_git
46
+ @html_git ||= git_open_or_clone(HtmlRepoURL, "html")
47
+ end
48
+
49
+ def text_git
50
+ @text_git ||= git_open_or_clone(TextRepoURL, "text")
51
+ end
52
+ end
53
+
54
+ require "law/japan/e_gov/downloader"
55
+ require "law/japan/e_gov/converter"
@@ -0,0 +1,5 @@
1
+ module Law
2
+ module Japan
3
+ VERSION = "0.0.1"
4
+ end
5
+ end
data/lib/law/japan.rb ADDED
@@ -0,0 +1,9 @@
1
+ require "law/japan/version"
2
+
3
+ module Law
4
+ module Japan
5
+ # Your code goes here...
6
+ end
7
+ end
8
+
9
+ require "law/japan/e_gov"
metadata ADDED
@@ -0,0 +1,140 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: law-japan
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Ryosuke IWANAGA
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-02-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: git
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.5'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.5'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: mechanize
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: nokogiri
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: This is a library for Japanese laws
98
+ email:
99
+ - riywo.jp@gmail.com
100
+ executables: []
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - ".gitignore"
105
+ - ".pryrc"
106
+ - Gemfile
107
+ - LICENSE.txt
108
+ - README.md
109
+ - Rakefile
110
+ - law-japan.gemspec
111
+ - lib/law/japan.rb
112
+ - lib/law/japan/e_gov.rb
113
+ - lib/law/japan/e_gov/converter.rb
114
+ - lib/law/japan/e_gov/downloader.rb
115
+ - lib/law/japan/version.rb
116
+ homepage: https://github.com/riywo/law-japan
117
+ licenses:
118
+ - MIT
119
+ metadata: {}
120
+ post_install_message:
121
+ rdoc_options: []
122
+ require_paths:
123
+ - lib
124
+ required_ruby_version: !ruby/object:Gem::Requirement
125
+ requirements:
126
+ - - ">="
127
+ - !ruby/object:Gem::Version
128
+ version: '0'
129
+ required_rubygems_version: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - ">="
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ requirements: []
135
+ rubyforge_project:
136
+ rubygems_version: 2.2.0
137
+ signing_key:
138
+ specification_version: 4
139
+ summary: Operating Japanese laws
140
+ test_files: []