RubyGems - wp2txt - Versions diffs - 1.1.2 → 1.1.3 - Mend

wp2txt 1.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 5e8cda1ff32863bb95b4b314782e46007eabcd784938b4fe33f6660445a37d31
-  data.tar.gz: 805bd29ba8b660e705156bf7a4cc1d006b2f43a2c81c24e86ef210ef1fd1ef16
+  metadata.gz: 62b3ee240d3ee685e3739eb4a8d6f9923677f75a97490d73286d64730b60ae5b
+  data.tar.gz: 1a2b9c3d23266b45d96e0f0984ebbcf9e610aa91adbdb3cc4891a23767cd0315
 SHA512:
-  metadata.gz: fe798d5ab55cefd55f776e4d0f975cb510a7c9c65af348ba216365827266d808b65f9125d9bc50c21cb05349ae71d07a46a80998ffd000bbcaac71b2eed15e45
-  data.tar.gz: 94f8df87a935b52d19f05adca27a01f64787dfa35fed067dc68cc1204b4b0022411f6cb6db1d2c9175987d27113e506f84d1761e4661c7d4f65f934c6ee1647e
+  metadata.gz: 14eb3cb035ac0815e30bcab8d4eaffd6ed84f8b7449f6d9ee2492d9656bbd847a3da8aad00fcb331a8c55a9379ce90ff33a5018f6ab35f1a1046c87c4b57ab17
+  data.tar.gz: 68a4e8bf5952a433be23528078a3fa526a3bcceceb1c3f8a7f79412256c78b9f5732ece782e1eb311711dec86eb9490d5f1596eba12ba15a627ebdae4999c551

data/README.md CHANGED Viewed

@@ -8,6 +8,10 @@ WP2TXT extracts text and category data from Wikipedia dump files (encoded in XML
 ## Changelog
+**May 2023**
+- Problems caused by too many parallel processors are addressed by setting the upper limit on the number of processors to 8.
 **April 2023**
 - File split/delete issues fixed
@@ -186,7 +190,7 @@ Command line options are as follows:
       -g, --category-only              Extract only article title and categories
       -s, --summary-only               Extract only article title, categories, and summary text before first heading
       -f, --file-size=<i>              Approximate size (in MB) of each output file (default: 10)
-      -n, --num-procs                  Number of proccesses to be run concurrently (default: max num of available CPU cores minus two)
+      -n, --num-procs                  Number of proccesses (up to 8) to be run concurrently (default: max num of available CPU cores minus two)
       -x, --del-interfile              Delete intermediate XML files from output dir
       -t, --title, --no-title          Keep page titles in output (default: true)
       -d, --heading, --no-heading      Keep section titles in output (default: true)

data/bin/wp2txt CHANGED Viewed

@@ -3,6 +3,7 @@
 # frozen_string_literal: true
 DEBUG_MODE = false
+MAX_PROCESSORS = 8
 require_relative "../lib/wp2txt"
 require_relative "../lib/wp2txt/utils"
@@ -34,7 +35,7 @@ class WpApp
       opt :category_only, "Extract only article title and categories", default: false, short: "-g"
       opt :summary_only, "Extract only article title, categories, and summary text before first heading", default: false, short: "-s"
       opt :file_size, "Approximate size (in MB) of each output file", default: 10, short: "-f"
-      opt :num_procs, "Number of proccesses to be run concurrently (default: max num of CPU cores minus two)", short: "-n"
+      opt :num_procs, "Number of proccesses (up to #{MAX_PROCESSORS}) to be run concurrently (default: max num of CPU cores minus two)", type: Integer, short: "-n"
       opt :del_interfile, "Delete intermediate XML files from output dir", short: "-x", default: false
       opt :title, "Keep page titles in output", default: true, short: "-t"
       opt :heading, "Keep section titles in output", default: true, short: "-d"
@@ -55,10 +56,11 @@ class WpApp
     output_dir = opts[:output_dir]
     tfile_size = opts[:file_size]
     num_processors = Etc.nprocessors
-    num_processes = if opts[:num_procs] && opts[:num_procs].to_i <= num_processors
+    num_processes = if opts[:num_procs] && opts[:num_procs].to_i <= num_processors && opts[:num_procs].to_i <= MAX_PROCESSORS
                       opts[:num_procs]
                     else
-                      num_processors - 2
+                      minus2 = num_processors - 2
+                      minus2 < MAX_PROCESSORS ? minus2 : MAX_PROCESSORS
                     end
     num_processes = 1 if num_processes < 1

data/lib/wp2txt/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Wp2txt
-  VERSION = "1.1.2"
+  VERSION = "1.1.3"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: wp2txt
 version: !ruby/object:Gem::Version
-  version: 1.1.2
+  version: 1.1.3
 platform: ruby
 authors:
 - Yoichiro Hasebe
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2023-04-15 00:00:00.000000000 Z
+date: 2023-05-13 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -208,7 +208,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.3.3
+rubygems_version: 3.4.12
 signing_key:
 specification_version: 4
 summary: A command-line toolkit to extract text content and category data from Wikipedia