law-japan 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/law-japan +8 -0
- data/law-japan.gemspec +1 -2
- data/lib/law/japan.rb +1 -1
- data/lib/law/japan/cli.rb +17 -0
- data/lib/law/japan/e_gov.rb +11 -44
- data/lib/law/japan/git.rb +54 -0
- data/lib/law/japan/version.rb +1 -1
- metadata +16 -28
- data/lib/law/japan/e_gov/converter.rb +0 -45
- data/lib/law/japan/e_gov/downloader.rb +0 -75
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5e20792ff38fdcd20a8852d256e7b1d704ba28ef
|
4
|
+
data.tar.gz: 58d5cd169c6cbbb82fd1ed9c795895bc469451b7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f5abdf09ecda9d49492ce30c434b81a0a43e3e81237eb42a67732dc6437dd50e836b19323380a42354f767feacd829dca9359a52ca507981ef8120a106f9b7d9
|
7
|
+
data.tar.gz: f700e710471e50d18dcea55c20a451a232e04d3816889310337d0c82d10acc460def9cdbbe67bde857ca8539ad56945ce3da30745f92e52a381410e2e17bc753
|
data/bin/law-japan
ADDED
data/law-japan.gemspec
CHANGED
@@ -19,10 +19,9 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
21
|
spec.add_dependency "git"
|
22
|
+
spec.add_dependency "thor"
|
22
23
|
|
23
24
|
spec.add_development_dependency "bundler", "~> 1.5"
|
24
25
|
spec.add_development_dependency "rake"
|
25
26
|
spec.add_development_dependency "pry"
|
26
|
-
spec.add_development_dependency "mechanize"
|
27
|
-
spec.add_development_dependency "nokogiri"
|
28
27
|
end
|
data/lib/law/japan.rb
CHANGED
@@ -0,0 +1,17 @@
|
|
1
|
+
require "law/japan"
|
2
|
+
|
3
|
+
require "thor"
|
4
|
+
|
5
|
+
class Law::Japan::CLI < Thor
|
6
|
+
desc "install", "Install git repo"
|
7
|
+
def install
|
8
|
+
e_gov = Law::Japan::EGov::CLI.new
|
9
|
+
e_gov.install
|
10
|
+
end
|
11
|
+
|
12
|
+
desc "update", "Pull the latest data"
|
13
|
+
def update
|
14
|
+
e_gov = Law::Japan::EGov::CLI.new
|
15
|
+
e_gov.pull
|
16
|
+
end
|
17
|
+
end
|
data/lib/law/japan/e_gov.rb
CHANGED
@@ -1,55 +1,22 @@
|
|
1
1
|
require "law/japan"
|
2
|
+
require "law/japan/git"
|
2
3
|
|
3
|
-
require "git"
|
4
4
|
require "logger"
|
5
5
|
|
6
6
|
class Law::Japan::EGov
|
7
|
-
|
7
|
+
include Law::Japan::Git
|
8
8
|
|
9
|
-
|
10
|
-
|
9
|
+
HomeDir = File.join(Law::Japan::HomeDir, "e_gov")
|
10
|
+
RepoDir = File.join(HomeDir, "repo")
|
11
|
+
RepoURL = "https://github.com/riywo/law-japan-e_gov-text.git"
|
11
12
|
|
12
|
-
def initialize
|
13
|
-
|
13
|
+
def initialize(repo_dir: nil, repo_url: nil, logger: nil)
|
14
|
+
@repo_dir = repo_dir || RepoDir
|
15
|
+
@repo_url = repo_url || RepoURL
|
16
|
+
@logger = logger || Logger.new(STDOUT)
|
14
17
|
end
|
15
18
|
|
16
|
-
|
17
|
-
|
18
|
-
text_git.pull
|
19
|
-
end
|
20
|
-
|
21
|
-
def download!
|
22
|
-
Downloader.new(html_data_dir).download!
|
23
|
-
end
|
24
|
-
|
25
|
-
def convert!
|
26
|
-
Converter.new(html_data_dir, text_data_dir).convert!
|
27
|
-
end
|
28
|
-
|
29
|
-
private
|
30
|
-
|
31
|
-
def html_data_dir
|
32
|
-
File.join(html_git.dir.path, "data")
|
33
|
-
end
|
34
|
-
|
35
|
-
def text_data_dir
|
36
|
-
File.join(text_git.dir.path, "data")
|
37
|
-
end
|
38
|
-
|
39
|
-
def git_open_or_clone(repo_url, name)
|
40
|
-
Git.open(File.join(SourceDir, name), log: Logger.new(STDOUT))
|
41
|
-
rescue
|
42
|
-
Git.clone(repo_url, name, path: SourceDir, log: Logger.new(STDOUT))
|
43
|
-
end
|
44
|
-
|
45
|
-
def html_git
|
46
|
-
@html_git ||= git_open_or_clone(HtmlRepoURL, "html")
|
47
|
-
end
|
48
|
-
|
49
|
-
def text_git
|
50
|
-
@text_git ||= git_open_or_clone(TextRepoURL, "text")
|
19
|
+
class CLI < Law::Japan::EGov
|
20
|
+
include Law::Japan::Git::CLI
|
51
21
|
end
|
52
22
|
end
|
53
|
-
|
54
|
-
require "law/japan/e_gov/downloader"
|
55
|
-
require "law/japan/e_gov/converter"
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require "law/japan"
|
2
|
+
|
3
|
+
require "git"
|
4
|
+
|
5
|
+
module Law::Japan::Git
|
6
|
+
def data_dir
|
7
|
+
@data_dir ||= File.join(git.dir.path, "data")
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def git
|
13
|
+
unless @git
|
14
|
+
begin
|
15
|
+
@git ||= ::Git.open(repo_dir, log: logger)
|
16
|
+
rescue ArgumentError
|
17
|
+
raise "#{repo_dir} has not been ready yet"
|
18
|
+
end
|
19
|
+
|
20
|
+
origin_url = @git.remote("origin").url
|
21
|
+
if origin_url != repo_url
|
22
|
+
raise "origin url(#{origin_url}) must be #{repo_url}"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
@git
|
26
|
+
end
|
27
|
+
|
28
|
+
def repo_dir
|
29
|
+
@repo_dir
|
30
|
+
end
|
31
|
+
|
32
|
+
def repo_url
|
33
|
+
@repo_url
|
34
|
+
end
|
35
|
+
|
36
|
+
def logger
|
37
|
+
@logger
|
38
|
+
end
|
39
|
+
|
40
|
+
module CLI
|
41
|
+
def install
|
42
|
+
FileUtils.rm_rf(repo_dir)
|
43
|
+
|
44
|
+
name = File.basename(repo_dir)
|
45
|
+
path = File.dirname(repo_dir)
|
46
|
+
FileUtils.mkdir_p(path)
|
47
|
+
::Git.clone(repo_url, name, path: path)
|
48
|
+
end
|
49
|
+
|
50
|
+
def pull
|
51
|
+
git.pull
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/lib/law/japan/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: law-japan
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryosuke IWANAGA
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: git
|
@@ -25,27 +25,13 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '1.5'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - "~>"
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '1.5'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: rake
|
28
|
+
name: thor
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
44
30
|
requirements:
|
45
31
|
- - ">="
|
46
32
|
- !ruby/object:Gem::Version
|
47
33
|
version: '0'
|
48
|
-
type: :
|
34
|
+
type: :runtime
|
49
35
|
prerelease: false
|
50
36
|
version_requirements: !ruby/object:Gem::Requirement
|
51
37
|
requirements:
|
@@ -53,21 +39,21 @@ dependencies:
|
|
53
39
|
- !ruby/object:Gem::Version
|
54
40
|
version: '0'
|
55
41
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
42
|
+
name: bundler
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|
58
44
|
requirements:
|
59
|
-
- - "
|
45
|
+
- - "~>"
|
60
46
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
47
|
+
version: '1.5'
|
62
48
|
type: :development
|
63
49
|
prerelease: false
|
64
50
|
version_requirements: !ruby/object:Gem::Requirement
|
65
51
|
requirements:
|
66
|
-
- - "
|
52
|
+
- - "~>"
|
67
53
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
54
|
+
version: '1.5'
|
69
55
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
56
|
+
name: rake
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
72
58
|
requirements:
|
73
59
|
- - ">="
|
@@ -81,7 +67,7 @@ dependencies:
|
|
81
67
|
- !ruby/object:Gem::Version
|
82
68
|
version: '0'
|
83
69
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
70
|
+
name: pry
|
85
71
|
requirement: !ruby/object:Gem::Requirement
|
86
72
|
requirements:
|
87
73
|
- - ">="
|
@@ -97,7 +83,8 @@ dependencies:
|
|
97
83
|
description: This is a library for Japanese laws
|
98
84
|
email:
|
99
85
|
- riywo.jp@gmail.com
|
100
|
-
executables:
|
86
|
+
executables:
|
87
|
+
- law-japan
|
101
88
|
extensions: []
|
102
89
|
extra_rdoc_files: []
|
103
90
|
files:
|
@@ -107,11 +94,12 @@ files:
|
|
107
94
|
- LICENSE.txt
|
108
95
|
- README.md
|
109
96
|
- Rakefile
|
97
|
+
- bin/law-japan
|
110
98
|
- law-japan.gemspec
|
111
99
|
- lib/law/japan.rb
|
100
|
+
- lib/law/japan/cli.rb
|
112
101
|
- lib/law/japan/e_gov.rb
|
113
|
-
- lib/law/japan/
|
114
|
-
- lib/law/japan/e_gov/downloader.rb
|
102
|
+
- lib/law/japan/git.rb
|
115
103
|
- lib/law/japan/version.rb
|
116
104
|
homepage: https://github.com/riywo/law-japan
|
117
105
|
licenses:
|
@@ -1,45 +0,0 @@
|
|
1
|
-
require "law/japan/e_gov"
|
2
|
-
require "find"
|
3
|
-
require "nokogiri"
|
4
|
-
require "logger"
|
5
|
-
|
6
|
-
class Law::Japan::EGov::Converter
|
7
|
-
attr_reader :html_dir, :text_dir
|
8
|
-
|
9
|
-
def initialize(html_dir, text_dir)
|
10
|
-
@html_dir = html_dir
|
11
|
-
@text_dir = text_dir
|
12
|
-
end
|
13
|
-
|
14
|
-
def convert!
|
15
|
-
logger.info "Start converting all laws"
|
16
|
-
convert
|
17
|
-
logger.info "Finish converting all laws"
|
18
|
-
end
|
19
|
-
|
20
|
-
private
|
21
|
-
|
22
|
-
def logger
|
23
|
-
@logger ||= Logger.new STDOUT
|
24
|
-
end
|
25
|
-
|
26
|
-
def convert
|
27
|
-
Dir.chdir(html_dir) do
|
28
|
-
Dir.glob(File.join("**", "*.html")) do |path|
|
29
|
-
convert_html(path)
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
def convert_html(path)
|
35
|
-
dirname = File.dirname(path)
|
36
|
-
basename = File.basename(path, ".html")
|
37
|
-
target_dir = File.join(text_dir, dirname)
|
38
|
-
target_file = File.join(target_dir, "#{basename}.txt")
|
39
|
-
logger.info "Converting to #{target_file}"
|
40
|
-
|
41
|
-
FileUtils.mkdir_p target_dir
|
42
|
-
text = Nokogiri::HTML(open(path)).css("body").first.text
|
43
|
-
File.write(target_file, text)
|
44
|
-
end
|
45
|
-
end
|
@@ -1,75 +0,0 @@
|
|
1
|
-
require "law/japan/e_gov"
|
2
|
-
require "mechanize"
|
3
|
-
|
4
|
-
class Law::Japan::EGov::Downloader
|
5
|
-
module Mechanize::Form::Clearable
|
6
|
-
refine Mechanize::Form do
|
7
|
-
def clear_buttons
|
8
|
-
@clicked_buttons = []
|
9
|
-
end
|
10
|
-
end
|
11
|
-
end
|
12
|
-
using Mechanize::Form::Clearable
|
13
|
-
|
14
|
-
attr_reader :root_dir
|
15
|
-
def initialize(root_dir)
|
16
|
-
@root_dir = root_dir
|
17
|
-
end
|
18
|
-
|
19
|
-
def download!
|
20
|
-
logger.info "Start downloading all laws"
|
21
|
-
download
|
22
|
-
logger.info "Finish downloading all laws"
|
23
|
-
end
|
24
|
-
|
25
|
-
private
|
26
|
-
|
27
|
-
def logger
|
28
|
-
@logger ||= Logger.new STDOUT
|
29
|
-
end
|
30
|
-
|
31
|
-
def agent
|
32
|
-
unless @agent
|
33
|
-
@agent = Mechanize.new { |a| a.user_agent_alias = "Windows IE 9" }
|
34
|
-
@agent.log = Logger.new STDOUT
|
35
|
-
end
|
36
|
-
@agent
|
37
|
-
end
|
38
|
-
|
39
|
-
def index_page
|
40
|
-
@index_page ||= agent.get("http://law.e-gov.go.jp/cgi-bin/idxsearch.cgi")
|
41
|
-
end
|
42
|
-
|
43
|
-
def category_form
|
44
|
-
index_page.forms_with(name: "index")[2]
|
45
|
-
end
|
46
|
-
|
47
|
-
def download
|
48
|
-
category_form.buttons.each do |button|
|
49
|
-
category_name = button.node.next.text.gsub(/[ ]+/, "")
|
50
|
-
|
51
|
-
category_form.clear_buttons
|
52
|
-
list_page = agent.submit(category_form, button)
|
53
|
-
sleep 1
|
54
|
-
|
55
|
-
list_page.links.each do |link|
|
56
|
-
law_name = link.text
|
57
|
-
h_file_name = CGI.parse(link.uri.query)["H_FILE_NAME"].first
|
58
|
-
if h_file_name =~ /^([MTSH]\d{2})/
|
59
|
-
law_url = "http://law.e-gov.go.jp/htmldata/#{$1}/#{h_file_name}.html"
|
60
|
-
law_file = File.join(root_dir, category_name, "#{h_file_name}.html")
|
61
|
-
if File.exists? law_file
|
62
|
-
logger.info "File already exists for #{law_name} (#{law_file})"
|
63
|
-
else
|
64
|
-
logger.info "Start downloading for #{law_name} (url: #{law_url}, file: #{law_file})"
|
65
|
-
agent.download(law_url, law_file)
|
66
|
-
logger.info "Finish downloading for #{law_name} (url: #{law_url}, file: #{law_file})"
|
67
|
-
sleep 2
|
68
|
-
end
|
69
|
-
else
|
70
|
-
logger.warn "Invalid H_FILE_NAME #{h_file_name} for #{law_file}"
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|