law-japan 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 35013c851c57f4a9463eae7dd4edc48414ad4bb5
4
- data.tar.gz: cfa5e29eb1a5e8f36474b8dda4d3e259e324fdbd
3
+ metadata.gz: 5e20792ff38fdcd20a8852d256e7b1d704ba28ef
4
+ data.tar.gz: 58d5cd169c6cbbb82fd1ed9c795895bc469451b7
5
5
  SHA512:
6
- metadata.gz: 0f8f6bae3c58f198213337f344b3a4f8d0c3c215a1f5eb2e1718f1ecd0a8fab120fa4c7d9b1f1c7a08629ec279105111a468bcdea6af4e1a56ac1f91648e4b6b
7
- data.tar.gz: b75ce8cc1fe7f348bc659836f250f96fc288d2fe8474f366ec8de1324468b808e578b16d48730b7fa59c46190bf1b861a2528f07f74913b2e10575c5e41e27dd
6
+ metadata.gz: f5abdf09ecda9d49492ce30c434b81a0a43e3e81237eb42a67732dc6437dd50e836b19323380a42354f767feacd829dca9359a52ca507981ef8120a106f9b7d9
7
+ data.tar.gz: f700e710471e50d18dcea55c20a451a232e04d3816889310337d0c82d10acc460def9cdbbe67bde857ca8539ad56945ce3da30745f92e52a381410e2e17bc753
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.unshift File.expand_path("../../lib", __FILE__)
4
+
5
+ require "law/japan"
6
+ require "law/japan/cli"
7
+
8
+ Law::Japan::CLI.start(ARGV)
@@ -19,10 +19,9 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib"]
20
20
 
21
21
  spec.add_dependency "git"
22
+ spec.add_dependency "thor"
22
23
 
23
24
  spec.add_development_dependency "bundler", "~> 1.5"
24
25
  spec.add_development_dependency "rake"
25
26
  spec.add_development_dependency "pry"
26
- spec.add_development_dependency "mechanize"
27
- spec.add_development_dependency "nokogiri"
28
27
  end
@@ -2,7 +2,7 @@ require "law/japan/version"
2
2
 
3
3
  module Law
4
4
  module Japan
5
- # Your code goes here...
5
+ HomeDir = File.join(Dir.home, ".law-japan")
6
6
  end
7
7
  end
8
8
 
@@ -0,0 +1,17 @@
1
+ require "law/japan"
2
+
3
+ require "thor"
4
+
5
+ class Law::Japan::CLI < Thor
6
+ desc "install", "Install git repo"
7
+ def install
8
+ e_gov = Law::Japan::EGov::CLI.new
9
+ e_gov.install
10
+ end
11
+
12
+ desc "update", "Pull the latest data"
13
+ def update
14
+ e_gov = Law::Japan::EGov::CLI.new
15
+ e_gov.pull
16
+ end
17
+ end
@@ -1,55 +1,22 @@
1
1
  require "law/japan"
2
+ require "law/japan/git"
2
3
 
3
- require "git"
4
4
  require "logger"
5
5
 
6
6
  class Law::Japan::EGov
7
- SourceDir = File.join(Dir.home, ".law-japan")
7
+ include Law::Japan::Git
8
8
 
9
- HtmlRepoURL = "git@github.com:riywo/law-japan-e_gov-html.git"
10
- TextRepoURL = "git@github.com:riywo/law-japan-e_gov-text.git"
9
+ HomeDir = File.join(Law::Japan::HomeDir, "e_gov")
10
+ RepoDir = File.join(HomeDir, "repo")
11
+ RepoURL = "https://github.com/riywo/law-japan-e_gov-text.git"
11
12
 
12
- def initialize
13
- FileUtils.mkdir_p SourceDir
13
+ def initialize(repo_dir: nil, repo_url: nil, logger: nil)
14
+ @repo_dir = repo_dir || RepoDir
15
+ @repo_url = repo_url || RepoURL
16
+ @logger = logger || Logger.new(STDOUT)
14
17
  end
15
18
 
16
- def update!
17
- html_git.pull
18
- text_git.pull
19
- end
20
-
21
- def download!
22
- Downloader.new(html_data_dir).download!
23
- end
24
-
25
- def convert!
26
- Converter.new(html_data_dir, text_data_dir).convert!
27
- end
28
-
29
- private
30
-
31
- def html_data_dir
32
- File.join(html_git.dir.path, "data")
33
- end
34
-
35
- def text_data_dir
36
- File.join(text_git.dir.path, "data")
37
- end
38
-
39
- def git_open_or_clone(repo_url, name)
40
- Git.open(File.join(SourceDir, name), log: Logger.new(STDOUT))
41
- rescue
42
- Git.clone(repo_url, name, path: SourceDir, log: Logger.new(STDOUT))
43
- end
44
-
45
- def html_git
46
- @html_git ||= git_open_or_clone(HtmlRepoURL, "html")
47
- end
48
-
49
- def text_git
50
- @text_git ||= git_open_or_clone(TextRepoURL, "text")
19
+ class CLI < Law::Japan::EGov
20
+ include Law::Japan::Git::CLI
51
21
  end
52
22
  end
53
-
54
- require "law/japan/e_gov/downloader"
55
- require "law/japan/e_gov/converter"
@@ -0,0 +1,54 @@
1
+ require "law/japan"
2
+
3
+ require "git"
4
+
5
+ module Law::Japan::Git
6
+ def data_dir
7
+ @data_dir ||= File.join(git.dir.path, "data")
8
+ end
9
+
10
+ private
11
+
12
+ def git
13
+ unless @git
14
+ begin
15
+ @git ||= ::Git.open(repo_dir, log: logger)
16
+ rescue ArgumentError
17
+ raise "#{repo_dir} has not been ready yet"
18
+ end
19
+
20
+ origin_url = @git.remote("origin").url
21
+ if origin_url != repo_url
22
+ raise "origin url(#{origin_url}) must be #{repo_url}"
23
+ end
24
+ end
25
+ @git
26
+ end
27
+
28
+ def repo_dir
29
+ @repo_dir
30
+ end
31
+
32
+ def repo_url
33
+ @repo_url
34
+ end
35
+
36
+ def logger
37
+ @logger
38
+ end
39
+
40
+ module CLI
41
+ def install
42
+ FileUtils.rm_rf(repo_dir)
43
+
44
+ name = File.basename(repo_dir)
45
+ path = File.dirname(repo_dir)
46
+ FileUtils.mkdir_p(path)
47
+ ::Git.clone(repo_url, name, path: path)
48
+ end
49
+
50
+ def pull
51
+ git.pull
52
+ end
53
+ end
54
+ end
@@ -1,5 +1,5 @@
1
1
  module Law
2
2
  module Japan
3
- VERSION = "0.0.1"
3
+ VERSION = "0.0.2"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: law-japan
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryosuke IWANAGA
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-13 00:00:00.000000000 Z
11
+ date: 2014-02-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: git
@@ -25,27 +25,13 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
- name: bundler
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - "~>"
32
- - !ruby/object:Gem::Version
33
- version: '1.5'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - "~>"
39
- - !ruby/object:Gem::Version
40
- version: '1.5'
41
- - !ruby/object:Gem::Dependency
42
- name: rake
28
+ name: thor
43
29
  requirement: !ruby/object:Gem::Requirement
44
30
  requirements:
45
31
  - - ">="
46
32
  - !ruby/object:Gem::Version
47
33
  version: '0'
48
- type: :development
34
+ type: :runtime
49
35
  prerelease: false
50
36
  version_requirements: !ruby/object:Gem::Requirement
51
37
  requirements:
@@ -53,21 +39,21 @@ dependencies:
53
39
  - !ruby/object:Gem::Version
54
40
  version: '0'
55
41
  - !ruby/object:Gem::Dependency
56
- name: pry
42
+ name: bundler
57
43
  requirement: !ruby/object:Gem::Requirement
58
44
  requirements:
59
- - - ">="
45
+ - - "~>"
60
46
  - !ruby/object:Gem::Version
61
- version: '0'
47
+ version: '1.5'
62
48
  type: :development
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
- - - ">="
52
+ - - "~>"
67
53
  - !ruby/object:Gem::Version
68
- version: '0'
54
+ version: '1.5'
69
55
  - !ruby/object:Gem::Dependency
70
- name: mechanize
56
+ name: rake
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
59
  - - ">="
@@ -81,7 +67,7 @@ dependencies:
81
67
  - !ruby/object:Gem::Version
82
68
  version: '0'
83
69
  - !ruby/object:Gem::Dependency
84
- name: nokogiri
70
+ name: pry
85
71
  requirement: !ruby/object:Gem::Requirement
86
72
  requirements:
87
73
  - - ">="
@@ -97,7 +83,8 @@ dependencies:
97
83
  description: This is a library for Japanese laws
98
84
  email:
99
85
  - riywo.jp@gmail.com
100
- executables: []
86
+ executables:
87
+ - law-japan
101
88
  extensions: []
102
89
  extra_rdoc_files: []
103
90
  files:
@@ -107,11 +94,12 @@ files:
107
94
  - LICENSE.txt
108
95
  - README.md
109
96
  - Rakefile
97
+ - bin/law-japan
110
98
  - law-japan.gemspec
111
99
  - lib/law/japan.rb
100
+ - lib/law/japan/cli.rb
112
101
  - lib/law/japan/e_gov.rb
113
- - lib/law/japan/e_gov/converter.rb
114
- - lib/law/japan/e_gov/downloader.rb
102
+ - lib/law/japan/git.rb
115
103
  - lib/law/japan/version.rb
116
104
  homepage: https://github.com/riywo/law-japan
117
105
  licenses:
@@ -1,45 +0,0 @@
1
- require "law/japan/e_gov"
2
- require "find"
3
- require "nokogiri"
4
- require "logger"
5
-
6
- class Law::Japan::EGov::Converter
7
- attr_reader :html_dir, :text_dir
8
-
9
- def initialize(html_dir, text_dir)
10
- @html_dir = html_dir
11
- @text_dir = text_dir
12
- end
13
-
14
- def convert!
15
- logger.info "Start converting all laws"
16
- convert
17
- logger.info "Finish converting all laws"
18
- end
19
-
20
- private
21
-
22
- def logger
23
- @logger ||= Logger.new STDOUT
24
- end
25
-
26
- def convert
27
- Dir.chdir(html_dir) do
28
- Dir.glob(File.join("**", "*.html")) do |path|
29
- convert_html(path)
30
- end
31
- end
32
- end
33
-
34
- def convert_html(path)
35
- dirname = File.dirname(path)
36
- basename = File.basename(path, ".html")
37
- target_dir = File.join(text_dir, dirname)
38
- target_file = File.join(target_dir, "#{basename}.txt")
39
- logger.info "Converting to #{target_file}"
40
-
41
- FileUtils.mkdir_p target_dir
42
- text = Nokogiri::HTML(open(path)).css("body").first.text
43
- File.write(target_file, text)
44
- end
45
- end
@@ -1,75 +0,0 @@
1
- require "law/japan/e_gov"
2
- require "mechanize"
3
-
4
- class Law::Japan::EGov::Downloader
5
- module Mechanize::Form::Clearable
6
- refine Mechanize::Form do
7
- def clear_buttons
8
- @clicked_buttons = []
9
- end
10
- end
11
- end
12
- using Mechanize::Form::Clearable
13
-
14
- attr_reader :root_dir
15
- def initialize(root_dir)
16
- @root_dir = root_dir
17
- end
18
-
19
- def download!
20
- logger.info "Start downloading all laws"
21
- download
22
- logger.info "Finish downloading all laws"
23
- end
24
-
25
- private
26
-
27
- def logger
28
- @logger ||= Logger.new STDOUT
29
- end
30
-
31
- def agent
32
- unless @agent
33
- @agent = Mechanize.new { |a| a.user_agent_alias = "Windows IE 9" }
34
- @agent.log = Logger.new STDOUT
35
- end
36
- @agent
37
- end
38
-
39
- def index_page
40
- @index_page ||= agent.get("http://law.e-gov.go.jp/cgi-bin/idxsearch.cgi")
41
- end
42
-
43
- def category_form
44
- index_page.forms_with(name: "index")[2]
45
- end
46
-
47
- def download
48
- category_form.buttons.each do |button|
49
- category_name = button.node.next.text.gsub(/[  ]+/, "")
50
-
51
- category_form.clear_buttons
52
- list_page = agent.submit(category_form, button)
53
- sleep 1
54
-
55
- list_page.links.each do |link|
56
- law_name = link.text
57
- h_file_name = CGI.parse(link.uri.query)["H_FILE_NAME"].first
58
- if h_file_name =~ /^([MTSH]\d{2})/
59
- law_url = "http://law.e-gov.go.jp/htmldata/#{$1}/#{h_file_name}.html"
60
- law_file = File.join(root_dir, category_name, "#{h_file_name}.html")
61
- if File.exists? law_file
62
- logger.info "File already exists for #{law_name} (#{law_file})"
63
- else
64
- logger.info "Start downloading for #{law_name} (url: #{law_url}, file: #{law_file})"
65
- agent.download(law_url, law_file)
66
- logger.info "Finish downloading for #{law_name} (url: #{law_url}, file: #{law_file})"
67
- sleep 2
68
- end
69
- else
70
- logger.warn "Invalid H_FILE_NAME #{h_file_name} for #{law_file}"
71
- end
72
- end
73
- end
74
- end
75
- end