law-japan 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 35013c851c57f4a9463eae7dd4edc48414ad4bb5
4
- data.tar.gz: cfa5e29eb1a5e8f36474b8dda4d3e259e324fdbd
3
+ metadata.gz: 5e20792ff38fdcd20a8852d256e7b1d704ba28ef
4
+ data.tar.gz: 58d5cd169c6cbbb82fd1ed9c795895bc469451b7
5
5
  SHA512:
6
- metadata.gz: 0f8f6bae3c58f198213337f344b3a4f8d0c3c215a1f5eb2e1718f1ecd0a8fab120fa4c7d9b1f1c7a08629ec279105111a468bcdea6af4e1a56ac1f91648e4b6b
7
- data.tar.gz: b75ce8cc1fe7f348bc659836f250f96fc288d2fe8474f366ec8de1324468b808e578b16d48730b7fa59c46190bf1b861a2528f07f74913b2e10575c5e41e27dd
6
+ metadata.gz: f5abdf09ecda9d49492ce30c434b81a0a43e3e81237eb42a67732dc6437dd50e836b19323380a42354f767feacd829dca9359a52ca507981ef8120a106f9b7d9
7
+ data.tar.gz: f700e710471e50d18dcea55c20a451a232e04d3816889310337d0c82d10acc460def9cdbbe67bde857ca8539ad56945ce3da30745f92e52a381410e2e17bc753
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.expand_path("../../lib", __FILE__)
4
+
5
+ require "law/japan"
6
+ require "law/japan/cli"
7
+
8
+ Law::Japan::CLI.start(ARGV)
@@ -19,10 +19,9 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib"]
20
20
 
21
21
  spec.add_dependency "git"
22
+ spec.add_dependency "thor"
22
23
 
23
24
  spec.add_development_dependency "bundler", "~> 1.5"
24
25
  spec.add_development_dependency "rake"
25
26
  spec.add_development_dependency "pry"
26
- spec.add_development_dependency "mechanize"
27
- spec.add_development_dependency "nokogiri"
28
27
  end
@@ -2,7 +2,7 @@ require "law/japan/version"
2
2
 
3
3
  module Law
4
4
  module Japan
5
- # Your code goes here...
5
+ HomeDir = File.join(Dir.home, ".law-japan")
6
6
  end
7
7
  end
8
8
 
@@ -0,0 +1,17 @@
1
+ require "law/japan"
2
+
3
+ require "thor"
4
+
5
+ class Law::Japan::CLI < Thor
6
+ desc "install", "Install git repo"
7
+ def install
8
+ e_gov = Law::Japan::EGov::CLI.new
9
+ e_gov.install
10
+ end
11
+
12
+ desc "update", "Pull the latest data"
13
+ def update
14
+ e_gov = Law::Japan::EGov::CLI.new
15
+ e_gov.pull
16
+ end
17
+ end
@@ -1,55 +1,22 @@
1
1
  require "law/japan"
2
+ require "law/japan/git"
2
3
 
3
- require "git"
4
4
  require "logger"
5
5
 
6
6
  class Law::Japan::EGov
7
- SourceDir = File.join(Dir.home, ".law-japan")
7
+ include Law::Japan::Git
8
8
 
9
- HtmlRepoURL = "git@github.com:riywo/law-japan-e_gov-html.git"
10
- TextRepoURL = "git@github.com:riywo/law-japan-e_gov-text.git"
9
+ HomeDir = File.join(Law::Japan::HomeDir, "e_gov")
10
+ RepoDir = File.join(HomeDir, "repo")
11
+ RepoURL = "https://github.com/riywo/law-japan-e_gov-text.git"
11
12
 
12
- def initialize
13
- FileUtils.mkdir_p SourceDir
13
+ def initialize(repo_dir: nil, repo_url: nil, logger: nil)
14
+ @repo_dir = repo_dir || RepoDir
15
+ @repo_url = repo_url || RepoURL
16
+ @logger = logger || Logger.new(STDOUT)
14
17
  end
15
18
 
16
- def update!
17
- html_git.pull
18
- text_git.pull
19
- end
20
-
21
- def download!
22
- Downloader.new(html_data_dir).download!
23
- end
24
-
25
- def convert!
26
- Converter.new(html_data_dir, text_data_dir).convert!
27
- end
28
-
29
- private
30
-
31
- def html_data_dir
32
- File.join(html_git.dir.path, "data")
33
- end
34
-
35
- def text_data_dir
36
- File.join(text_git.dir.path, "data")
37
- end
38
-
39
- def git_open_or_clone(repo_url, name)
40
- Git.open(File.join(SourceDir, name), log: Logger.new(STDOUT))
41
- rescue
42
- Git.clone(repo_url, name, path: SourceDir, log: Logger.new(STDOUT))
43
- end
44
-
45
- def html_git
46
- @html_git ||= git_open_or_clone(HtmlRepoURL, "html")
47
- end
48
-
49
- def text_git
50
- @text_git ||= git_open_or_clone(TextRepoURL, "text")
19
+ class CLI < Law::Japan::EGov
20
+ include Law::Japan::Git::CLI
51
21
  end
52
22
  end
53
-
54
- require "law/japan/e_gov/downloader"
55
- require "law/japan/e_gov/converter"
@@ -0,0 +1,54 @@
1
+ require "law/japan"
2
+
3
+ require "git"
4
+
5
+ module Law::Japan::Git
6
+ def data_dir
7
+ @data_dir ||= File.join(git.dir.path, "data")
8
+ end
9
+
10
+ private
11
+
12
+ def git
13
+ unless @git
14
+ begin
15
+ @git ||= ::Git.open(repo_dir, log: logger)
16
+ rescue ArgumentError
17
+ raise "#{repo_dir} has not been ready yet"
18
+ end
19
+
20
+ origin_url = @git.remote("origin").url
21
+ if origin_url != repo_url
22
+ raise "origin url(#{origin_url}) must be #{repo_url}"
23
+ end
24
+ end
25
+ @git
26
+ end
27
+
28
+ def repo_dir
29
+ @repo_dir
30
+ end
31
+
32
+ def repo_url
33
+ @repo_url
34
+ end
35
+
36
+ def logger
37
+ @logger
38
+ end
39
+
40
+ module CLI
41
+ def install
42
+ FileUtils.rm_rf(repo_dir)
43
+
44
+ name = File.basename(repo_dir)
45
+ path = File.dirname(repo_dir)
46
+ FileUtils.mkdir_p(path)
47
+ ::Git.clone(repo_url, name, path: path)
48
+ end
49
+
50
+ def pull
51
+ git.pull
52
+ end
53
+ end
54
+ end
@@ -1,5 +1,5 @@
1
1
  module Law
2
2
  module Japan
3
- VERSION = "0.0.1"
3
+ VERSION = "0.0.2"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: law-japan
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryosuke IWANAGA
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-13 00:00:00.000000000 Z
11
+ date: 2014-02-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: git
@@ -25,27 +25,13 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
- name: bundler
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - "~>"
32
- - !ruby/object:Gem::Version
33
- version: '1.5'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - "~>"
39
- - !ruby/object:Gem::Version
40
- version: '1.5'
41
- - !ruby/object:Gem::Dependency
42
- name: rake
28
+ name: thor
43
29
  requirement: !ruby/object:Gem::Requirement
44
30
  requirements:
45
31
  - - ">="
46
32
  - !ruby/object:Gem::Version
47
33
  version: '0'
48
- type: :development
34
+ type: :runtime
49
35
  prerelease: false
50
36
  version_requirements: !ruby/object:Gem::Requirement
51
37
  requirements:
@@ -53,21 +39,21 @@ dependencies:
53
39
  - !ruby/object:Gem::Version
54
40
  version: '0'
55
41
  - !ruby/object:Gem::Dependency
56
- name: pry
42
+ name: bundler
57
43
  requirement: !ruby/object:Gem::Requirement
58
44
  requirements:
59
- - - ">="
45
+ - - "~>"
60
46
  - !ruby/object:Gem::Version
61
- version: '0'
47
+ version: '1.5'
62
48
  type: :development
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
- - - ">="
52
+ - - "~>"
67
53
  - !ruby/object:Gem::Version
68
- version: '0'
54
+ version: '1.5'
69
55
  - !ruby/object:Gem::Dependency
70
- name: mechanize
56
+ name: rake
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
59
  - - ">="
@@ -81,7 +67,7 @@ dependencies:
81
67
  - !ruby/object:Gem::Version
82
68
  version: '0'
83
69
  - !ruby/object:Gem::Dependency
84
- name: nokogiri
70
+ name: pry
85
71
  requirement: !ruby/object:Gem::Requirement
86
72
  requirements:
87
73
  - - ">="
@@ -97,7 +83,8 @@ dependencies:
97
83
  description: This is a library for Japanese laws
98
84
  email:
99
85
  - riywo.jp@gmail.com
100
- executables: []
86
+ executables:
87
+ - law-japan
101
88
  extensions: []
102
89
  extra_rdoc_files: []
103
90
  files:
@@ -107,11 +94,12 @@ files:
107
94
  - LICENSE.txt
108
95
  - README.md
109
96
  - Rakefile
97
+ - bin/law-japan
110
98
  - law-japan.gemspec
111
99
  - lib/law/japan.rb
100
+ - lib/law/japan/cli.rb
112
101
  - lib/law/japan/e_gov.rb
113
- - lib/law/japan/e_gov/converter.rb
114
- - lib/law/japan/e_gov/downloader.rb
102
+ - lib/law/japan/git.rb
115
103
  - lib/law/japan/version.rb
116
104
  homepage: https://github.com/riywo/law-japan
117
105
  licenses:
@@ -1,45 +0,0 @@
1
- require "law/japan/e_gov"
2
- require "find"
3
- require "nokogiri"
4
- require "logger"
5
-
6
- class Law::Japan::EGov::Converter
7
- attr_reader :html_dir, :text_dir
8
-
9
- def initialize(html_dir, text_dir)
10
- @html_dir = html_dir
11
- @text_dir = text_dir
12
- end
13
-
14
- def convert!
15
- logger.info "Start converting all laws"
16
- convert
17
- logger.info "Finish converting all laws"
18
- end
19
-
20
- private
21
-
22
- def logger
23
- @logger ||= Logger.new STDOUT
24
- end
25
-
26
- def convert
27
- Dir.chdir(html_dir) do
28
- Dir.glob(File.join("**", "*.html")) do |path|
29
- convert_html(path)
30
- end
31
- end
32
- end
33
-
34
- def convert_html(path)
35
- dirname = File.dirname(path)
36
- basename = File.basename(path, ".html")
37
- target_dir = File.join(text_dir, dirname)
38
- target_file = File.join(target_dir, "#{basename}.txt")
39
- logger.info "Converting to #{target_file}"
40
-
41
- FileUtils.mkdir_p target_dir
42
- text = Nokogiri::HTML(open(path)).css("body").first.text
43
- File.write(target_file, text)
44
- end
45
- end
@@ -1,75 +0,0 @@
1
- require "law/japan/e_gov"
2
- require "mechanize"
3
-
4
- class Law::Japan::EGov::Downloader
5
- module Mechanize::Form::Clearable
6
- refine Mechanize::Form do
7
- def clear_buttons
8
- @clicked_buttons = []
9
- end
10
- end
11
- end
12
- using Mechanize::Form::Clearable
13
-
14
- attr_reader :root_dir
15
- def initialize(root_dir)
16
- @root_dir = root_dir
17
- end
18
-
19
- def download!
20
- logger.info "Start downloading all laws"
21
- download
22
- logger.info "Finish downloading all laws"
23
- end
24
-
25
- private
26
-
27
- def logger
28
- @logger ||= Logger.new STDOUT
29
- end
30
-
31
- def agent
32
- unless @agent
33
- @agent = Mechanize.new { |a| a.user_agent_alias = "Windows IE 9" }
34
- @agent.log = Logger.new STDOUT
35
- end
36
- @agent
37
- end
38
-
39
- def index_page
40
- @index_page ||= agent.get("http://law.e-gov.go.jp/cgi-bin/idxsearch.cgi")
41
- end
42
-
43
- def category_form
44
- index_page.forms_with(name: "index")[2]
45
- end
46
-
47
- def download
48
- category_form.buttons.each do |button|
49
- category_name = button.node.next.text.gsub(/[  ]+/, "")
50
-
51
- category_form.clear_buttons
52
- list_page = agent.submit(category_form, button)
53
- sleep 1
54
-
55
- list_page.links.each do |link|
56
- law_name = link.text
57
- h_file_name = CGI.parse(link.uri.query)["H_FILE_NAME"].first
58
- if h_file_name =~ /^([MTSH]\d{2})/
59
- law_url = "http://law.e-gov.go.jp/htmldata/#{$1}/#{h_file_name}.html"
60
- law_file = File.join(root_dir, category_name, "#{h_file_name}.html")
61
- if File.exists? law_file
62
- logger.info "File already exists for #{law_name} (#{law_file})"
63
- else
64
- logger.info "Start downloading for #{law_name} (url: #{law_url}, file: #{law_file})"
65
- agent.download(law_url, law_file)
66
- logger.info "Finish downloading for #{law_name} (url: #{law_url}, file: #{law_file})"
67
- sleep 2
68
- end
69
- else
70
- logger.warn "Invalid H_FILE_NAME #{h_file_name} for #{law_file}"
71
- end
72
- end
73
- end
74
- end
75
- end