wp2txt 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 62f1e8d6ab1932f3ae3c34fb71930b7e73500c832481dcea6288742c38850a79
4
- data.tar.gz: f0ff0a5488b635b828338d41029c5ad191a0c88282c0fa294a9facf2d93c055b
3
+ metadata.gz: 0bcba2c84286504ae628176aad55dbdea05889dfaa7f471cf080ae933691cffc
4
+ data.tar.gz: f83c63d7c6e91270da1da2aed54ab6e5c352c5695340ccd6378fdd20c43fc332
5
5
  SHA512:
6
- metadata.gz: 7bca85758e88d53dcd33fe43e83a251624f89329a2cb55ffb97b41141bcf8fe5ace7c48e3b8e49f5aa42f84724247cfe4ad376238a949e9154876d4d07469afe
7
- data.tar.gz: de59399d5163afed2947e0802abf2e0365894d566c8a1f11823bc901d4948346e7af47d6fba558387f5af7e1301a6725a51a322ac1cd4810264dc3003e0729e2
6
+ metadata.gz: ae0eae028a98d4299a0e93278220b991e53e13deb80b88cce2971cd889d769808305e0d3aa8ee4e73af0cc55f07f27c0cc6a9f0d440e4693a410ba7d0a6333ba
7
+ data.tar.gz: 25bff247bf80b4a0b5ff785ed51b60e3e21e6a3ca5e0bfeace9961df5060832ae047c3fa606a3267f9f770c253fd816176372c4d2f7ba73999d5317ea59933e6
data/README.md CHANGED
@@ -8,6 +8,11 @@ WP2TXT extracts text and category data from Wikipedia dump files (encoded in XML
8
8
 
9
9
  ## Changelog
10
10
 
11
+ **January 2023**
12
+
13
+ - Bug related to command line arguments fixed
14
+ - Code cleanup introducing Rubocop
15
+
11
16
  **December 2022**
12
17
 
13
18
  - Docker images available via Docker Hub
@@ -93,7 +98,7 @@ Download the latest Wikipedia dump file for the desired language at a URL such a
93
98
 
94
99
  https://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2
95
100
 
96
- Here, `enwiki` refers to the English Wikipedia. To get the Japanese Wikipedia dump file, for instance, change this to jawiki (Japanese). In doing so, note that there are two instances of `enwiki` in the URL above.
101
+ Here, `enwiki` refers to the English Wikipedia. To get the Japanese Wikipedia dump file, for instance, change this to `jawiki` (Japanese). In doing so, note that there are two instances of `enwiki` in the URL above.
97
102
 
98
103
  Alternatively, you can also select Wikipedia dump files created on a specific date from [here](http://dumps.wikimedia.org/backup-index.html). Make sure to download a file named in the following format:
99
104
 
@@ -213,11 +218,11 @@ The author will appreciate your mentioning one of these in your research.
213
218
  Or use this BibTeX entry:
214
219
 
215
220
  ```
216
- @misc{wp2txt_2022,
221
+ @misc{wp2txt_2023,
217
222
  author = {Yoichiro Hasebe},
218
223
  title = {WP2TXT: A command-line toolkit to extract text content and category data from Wikipedia dump files},
219
224
  url = {https://github.com/yohasebe/wp2txt},
220
- year = {2022}
225
+ year = {2023}
221
226
  }
222
227
  ```
223
228
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Wp2txt
4
- VERSION = "1.1.0"
4
+ VERSION = "1.1.1"
5
5
  end
data/lib/wp2txt.rb CHANGED
@@ -48,14 +48,19 @@ module Wp2txt
48
48
  basename = File.basename(command)
49
49
  path = +""
50
50
  print "Checking #{basename}: "
51
- if open("| which #{command} 2>/dev/null") { |f| path = f.gets.strip }
52
- puts "detected [#{path}]"
53
- path.strip
54
- elsif open("| which #{basename} 2>/dev/null") { |f| path = f.gets.strip }
55
- puts "detected [#{path}]"
56
- path.strip
57
- else
58
- puts "not found"
51
+ begin
52
+ if open("| which #{command} 2>/dev/null") { |f| path = f.gets.strip }
53
+ puts "detected [#{path}]"
54
+ path.strip
55
+ elsif open("| which #{basename} 2>/dev/null") { |f| path = f.gets.strip }
56
+ puts "detected [#{path}]"
57
+ path.strip
58
+ else
59
+ puts "#{basename} not found"
60
+ false
61
+ end
62
+ rescue StandardError
63
+ puts "#{basename} not found"
59
64
  false
60
65
  end
61
66
  end
@@ -69,7 +74,7 @@ module Wp2txt
69
74
  if /.bz2$/ =~ @input_file
70
75
  if @bz2_gem
71
76
  file = Bzip2::Reader.new File.open(@input_file, "r:UTF-8")
72
- elsif RUBY_PLATFORM.index("win32")
77
+ elsif Gem.win_platform?
73
78
  file = IO.popen("bunzip2.exe -c #{@input_file}")
74
79
  elsif (bzpath = command_exist?("lbzip2") || command_exist?("pbzip2") || command_exist?("bzip2"))
75
80
  file = IO.popen("#{bzpath} -c -d #{@input_file}")
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wp2txt
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yoichiro Hasebe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-22 00:00:00.000000000 Z
11
+ date: 2023-01-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -208,7 +208,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
208
208
  - !ruby/object:Gem::Version
209
209
  version: '0'
210
210
  requirements: []
211
- rubygems_version: 3.4.1
211
+ rubygems_version: 3.4.2
212
212
  signing_key:
213
213
  specification_version: 4
214
214
  summary: A command-line toolkit to extract text content and category data from Wikipedia