RubyGems - wp2txt - Versions diffs - 1.1.0 → 1.1.2 - Mend

wp2txt 1.1.0 → 1.1.2

Files changed (6) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 62f1e8d6ab1932f3ae3c34fb71930b7e73500c832481dcea6288742c38850a79
-  data.tar.gz: f0ff0a5488b635b828338d41029c5ad191a0c88282c0fa294a9facf2d93c055b
+  metadata.gz: 5e8cda1ff32863bb95b4b314782e46007eabcd784938b4fe33f6660445a37d31
+  data.tar.gz: 805bd29ba8b660e705156bf7a4cc1d006b2f43a2c81c24e86ef210ef1fd1ef16
 SHA512:
-  metadata.gz: 7bca85758e88d53dcd33fe43e83a251624f89329a2cb55ffb97b41141bcf8fe5ace7c48e3b8e49f5aa42f84724247cfe4ad376238a949e9154876d4d07469afe
-  data.tar.gz: de59399d5163afed2947e0802abf2e0365894d566c8a1f11823bc901d4948346e7af47d6fba558387f5af7e1301a6725a51a322ac1cd4810264dc3003e0729e2
+  metadata.gz: fe798d5ab55cefd55f776e4d0f975cb510a7c9c65af348ba216365827266d808b65f9125d9bc50c21cb05349ae71d07a46a80998ffd000bbcaac71b2eed15e45
+  data.tar.gz: 94f8df87a935b52d19f05adca27a01f64787dfa35fed067dc68cc1204b4b0022411f6cb6db1d2c9175987d27113e506f84d1761e4661c7d4f65f934c6ee1647e

data/README.md CHANGED Viewed

@@ -8,6 +8,15 @@ WP2TXT extracts text and category data from Wikipedia dump files (encoded in XML
 ## Changelog
+**April 2023**
+- File split/delete issues fixed
+**January 2023**
+- Bug related to command line arguments fixed
+- Code cleanup introducing Rubocop
 **December 2022**
 - Docker images available via Docker Hub
@@ -93,7 +102,7 @@ Download the latest Wikipedia dump file for the desired language at a URL such a
     https://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2
-Here, `enwiki` refers to the English Wikipedia. To get the Japanese Wikipedia dump file, for instance, change this to jawiki (Japanese). In doing so, note that there are two instances of `enwiki` in the URL above.
+Here, `enwiki` refers to the English Wikipedia. To get the Japanese Wikipedia dump file, for instance, change this to `jawiki` (Japanese). In doing so, note that there are two instances of `enwiki` in the URL above.
 Alternatively, you can also select Wikipedia dump files created on a specific date from [here](http://dumps.wikimedia.org/backup-index.html). Make sure to download a file named in the following format:
@@ -213,11 +222,11 @@ The author will appreciate your mentioning one of these in your research.
 Or use this BibTeX entry:
 ```
-@misc{wp2txt_2022,
+@misc{wp2txt_2023,
   author = {Yoichiro Hasebe},
   title = {WP2TXT: A command-line toolkit to extract text content and category data from Wikipedia dump files},
   url = {https://github.com/yohasebe/wp2txt},
-  year = {2022}
+  year = {2023}
 }
 ```

data/bin/wp2txt CHANGED Viewed

@@ -3,8 +3,6 @@
 # frozen_string_literal: true
 DEBUG_MODE = false
-SHAREDIR = File.join(File.dirname(__FILE__), "..", "share")
-DOCDIR = File.join(File.dirname(__FILE__), "..", "doc")
 require_relative "../lib/wp2txt"
 require_relative "../lib/wp2txt/utils"

data/lib/wp2txt/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Wp2txt
-  VERSION = "1.1.0"
+  VERSION = "1.1.2"
 end

data/lib/wp2txt.rb CHANGED Viewed

@@ -48,14 +48,19 @@ module Wp2txt
       basename = File.basename(command)
       path = +""
       print "Checking #{basename}: "
-      if open("| which #{command} 2>/dev/null") { |f| path = f.gets.strip }
-        puts "detected [#{path}]"
-        path.strip
-      elsif open("| which #{basename} 2>/dev/null") { |f| path = f.gets.strip }
-        puts "detected [#{path}]"
-        path.strip
-      else
-        puts "not found"
+      begin
+        if open("| which #{command} 2>/dev/null") { |f| path = f.gets.strip }
+          puts "detected [#{path}]"
+          path.strip
+        elsif open("| which #{basename} 2>/dev/null") { |f| path = f.gets.strip }
+          puts "detected [#{path}]"
+          path.strip
+        else
+          puts "#{basename} not found"
+          false
+        end
+      rescue StandardError
+        puts "#{basename} not found"
         false
       end
     end
@@ -69,7 +74,7 @@ module Wp2txt
       if /.bz2$/ =~ @input_file
         if @bz2_gem
           file = Bzip2::Reader.new File.open(@input_file, "r:UTF-8")
-        elsif RUBY_PLATFORM.index("win32")
+        elsif Gem.win_platform?
           file = IO.popen("bunzip2.exe -c #{@input_file}")
         elsif (bzpath = command_exist?("lbzip2") || command_exist?("pbzip2") || command_exist?("bzip2"))
           file = IO.popen("#{bzpath} -c -d #{@input_file}")
@@ -155,7 +160,7 @@ module Wp2txt
       @fp.puts(output_text) if output_text != ""
       @fp.close
-      if File.size(outfilename).zero?
+      if outfilename && File.size(outfilename).zero?
         File.delete(outfilename)
         @outfiles.delete(outfilename)
       end
@@ -292,6 +297,7 @@ module Wp2txt
           @fp.puts(output_text)
           @fp.close
         end
+        @file_pointer.close
         File.delete(@input_file) if @del_interfile
         output_text = +""
       end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: wp2txt
 version: !ruby/object:Gem::Version
-  version: 1.1.0
+  version: 1.1.2
 platform: ruby
 authors:
 - Yoichiro Hasebe
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2023-01-22 00:00:00.000000000 Z
+date: 2023-04-15 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -208,7 +208,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.4.1
+rubygems_version: 3.3.3
 signing_key:
 specification_version: 4
 summary: A command-line toolkit to extract text content and category data from Wikipedia