wp2txt 1.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 62f1e8d6ab1932f3ae3c34fb71930b7e73500c832481dcea6288742c38850a79
4
- data.tar.gz: f0ff0a5488b635b828338d41029c5ad191a0c88282c0fa294a9facf2d93c055b
3
+ metadata.gz: 0bcba2c84286504ae628176aad55dbdea05889dfaa7f471cf080ae933691cffc
4
+ data.tar.gz: f83c63d7c6e91270da1da2aed54ab6e5c352c5695340ccd6378fdd20c43fc332
5
5
  SHA512:
6
- metadata.gz: 7bca85758e88d53dcd33fe43e83a251624f89329a2cb55ffb97b41141bcf8fe5ace7c48e3b8e49f5aa42f84724247cfe4ad376238a949e9154876d4d07469afe
7
- data.tar.gz: de59399d5163afed2947e0802abf2e0365894d566c8a1f11823bc901d4948346e7af47d6fba558387f5af7e1301a6725a51a322ac1cd4810264dc3003e0729e2
6
+ metadata.gz: ae0eae028a98d4299a0e93278220b991e53e13deb80b88cce2971cd889d769808305e0d3aa8ee4e73af0cc55f07f27c0cc6a9f0d440e4693a410ba7d0a6333ba
7
+ data.tar.gz: 25bff247bf80b4a0b5ff785ed51b60e3e21e6a3ca5e0bfeace9961df5060832ae047c3fa606a3267f9f770c253fd816176372c4d2f7ba73999d5317ea59933e6
data/README.md CHANGED
@@ -8,6 +8,11 @@ WP2TXT extracts text and category data from Wikipedia dump files (encoded in XML
8
8
 
9
9
  ## Changelog
10
10
 
11
+ **January 2023**
12
+
13
+ - Bug related to command line arguments fixed
14
+ - Code cleanup introducing Rubocop
15
+
11
16
  **December 2022**
12
17
 
13
18
  - Docker images available via Docker Hub
@@ -93,7 +98,7 @@ Download the latest Wikipedia dump file for the desired language at a URL such a
93
98
 
94
99
  https://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2
95
100
 
96
- Here, `enwiki` refers to the English Wikipedia. To get the Japanese Wikipedia dump file, for instance, change this to jawiki (Japanese). In doing so, note that there are two instances of `enwiki` in the URL above.
101
+ Here, `enwiki` refers to the English Wikipedia. To get the Japanese Wikipedia dump file, for instance, change this to `jawiki` (Japanese). In doing so, note that there are two instances of `enwiki` in the URL above.
97
102
 
98
103
  Alternatively, you can also select Wikipedia dump files created on a specific date from [here](http://dumps.wikimedia.org/backup-index.html). Make sure to download a file named in the following format:
99
104
 
@@ -213,11 +218,11 @@ The author will appreciate your mentioning one of these in your research.
213
218
  Or use this BibTeX entry:
214
219
 
215
220
  ```
216
- @misc{wp2txt_2022,
221
+ @misc{wp2txt_2023,
217
222
  author = {Yoichiro Hasebe},
218
223
  title = {WP2TXT: A command-line toolkit to extract text content and category data from Wikipedia dump files},
219
224
  url = {https://github.com/yohasebe/wp2txt},
220
- year = {2022}
225
+ year = {2023}
221
226
  }
222
227
  ```
223
228
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Wp2txt
4
- VERSION = "1.1.0"
4
+ VERSION = "1.1.1"
5
5
  end
data/lib/wp2txt.rb CHANGED
@@ -48,14 +48,19 @@ module Wp2txt
48
48
  basename = File.basename(command)
49
49
  path = +""
50
50
  print "Checking #{basename}: "
51
- if open("| which #{command} 2>/dev/null") { |f| path = f.gets.strip }
52
- puts "detected [#{path}]"
53
- path.strip
54
- elsif open("| which #{basename} 2>/dev/null") { |f| path = f.gets.strip }
55
- puts "detected [#{path}]"
56
- path.strip
57
- else
58
- puts "not found"
51
+ begin
52
+ if open("| which #{command} 2>/dev/null") { |f| path = f.gets.strip }
53
+ puts "detected [#{path}]"
54
+ path.strip
55
+ elsif open("| which #{basename} 2>/dev/null") { |f| path = f.gets.strip }
56
+ puts "detected [#{path}]"
57
+ path.strip
58
+ else
59
+ puts "#{basename} not found"
60
+ false
61
+ end
62
+ rescue StandardError
63
+ puts "#{basename} not found"
59
64
  false
60
65
  end
61
66
  end
@@ -69,7 +74,7 @@ module Wp2txt
69
74
  if /.bz2$/ =~ @input_file
70
75
  if @bz2_gem
71
76
  file = Bzip2::Reader.new File.open(@input_file, "r:UTF-8")
72
- elsif RUBY_PLATFORM.index("win32")
77
+ elsif Gem.win_platform?
73
78
  file = IO.popen("bunzip2.exe -c #{@input_file}")
74
79
  elsif (bzpath = command_exist?("lbzip2") || command_exist?("pbzip2") || command_exist?("bzip2"))
75
80
  file = IO.popen("#{bzpath} -c -d #{@input_file}")
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wp2txt
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yoichiro Hasebe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-22 00:00:00.000000000 Z
11
+ date: 2023-01-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -208,7 +208,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
208
208
  - !ruby/object:Gem::Version
209
209
  version: '0'
210
210
  requirements: []
211
- rubygems_version: 3.4.1
211
+ rubygems_version: 3.4.2
212
212
  signing_key:
213
213
  specification_version: 4
214
214
  summary: A command-line toolkit to extract text content and category data from Wikipedia