RubyGems - act_as_page_extractor - Versions diffs - 0.6.1 → 0.6.2 - Mend

act_as_page_extractor 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/README.md +80 -71
data/act_as_page_extractor.gemspec +1 -1
data/lib/act_as_page_extractor/modules/tools.rb +4 -4
data/lib/act_as_page_extractor/version.rb +1 -1
data/spec/spec_helper.rb +6 -2
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: '0956a6daefbfb71e23d34ba59b5010fee87805166d419725c8d95e116ec54ef7'
-  data.tar.gz: '096d66ce3473ab91068d35a3b5c00de975ce186093735e59592fdf9863b36aee'
+  metadata.gz: d0bd64f8e12d0c7bb3a75893738e30af616e4bcc5b958b18853b35363823b5ef
+  data.tar.gz: d87505b025bd924a545e2f6cbd9071958d65f993a234092f65b3cb7e108b16b1
 SHA512:
-  metadata.gz: d4c48c1bbcdac244047230e7144504d38b453b4d05f72093dec848b733950522b0498c013be62796ef4cba0af005399aa2d6987cd6ff1c2e9fc249758f8b45e7
-  data.tar.gz: 136917cec986faeb2ad866bc63bad36a96cdd76e7d129b146c79589a67f9dee69a0f6521c4c958024e54ffcbc441332b2ec89b7d986376becddbefbbebd8efb2
+  metadata.gz: af0708407f3b4546424e1666926c248cdb9fe0813ede2dd642d099836282d2f608d8d47edcd5cd513cef9b3ead231c192f6b815ec7721eb141b6820f561d0f30
+  data.tar.gz: 6a5969118ff6a6141aaaec8989e38670f75a817afa230636ed84f9f2a4e7c1f160ee569664f940e1355cfc74dff56e30370ca993fbb38d2a7139c17f56858acf

data/README.md CHANGED Viewed

@@ -7,102 +7,111 @@ Library for extracting plain text from documents(files) for further processing (
 Install appropriate tools before using:
-    sudo apt-get install zlib1g zlib1g-dev zip rar p7zip-full
+```sh
+sudo apt-get install zlib1g zlib1g-dev zip rar p7zip-full
+```
 Add this line to your application's Gemfile:
-    gem 'act_as_page_extractor'
+```rb
+gem 'act_as_page_extractor'
+bundle
+```
 ## Usage
-For example, for model Document we need execute:
+For example, for model Document in the Rails framework we need run:
-    $ bundle
-    $ rails g act_as_page_extractor:migration Document category_id user_id
+```sh
+rails g act_as_page_extractor:migration Document category_id user_id
+```
 As a result we get two migration files:
-    class AddPageExtractorFields < ActiveRecord::Migration
-      def change
-        add_column :documents, :page_extraction_state, :string, default: ''
-        add_column :documents, :page_extraction_pages, :integer, default: 0
-        add_column :documents, :page_extraction_doctype, :string, default: ''
-        add_column :documents, :page_extraction_filesize, :string, default: ''
-      end
-    end
-    class CreateExtractedPages < ActiveRecord::Migration
-      def change
-        create_table :extracted_pages do |t|
-          t.text :page
-          t.integer :document_id
-          t.integer :category_id
-          t.integer :user_id
-          t.integer :page_number
-          t.timestamps null: false
-        end
-        add_index :extracted_pages, :document_id
-        add_index :extracted_pages, :category_id
-        add_index :extracted_pages, [:document_id, :category_id]
-        add_index :extracted_pages, [:document_id, :page_number]
-      end
+```rb
+class AddPageExtractorFields < ActiveRecord::Migration
+  def change
+    add_column :documents, :page_extraction_state, :string, default: ''
+    add_column :documents, :page_extraction_pages, :integer, default: 0
+    add_column :documents, :page_extraction_doctype, :string, default: ''
+    add_column :documents, :page_extraction_filesize, :string, default: ''
+  end
+end
+class CreateExtractedPages < ActiveRecord::Migration
+  def change
+    create_table :extracted_pages do |t|
+      t.text :page
+      t.integer :document_id
+      t.integer :category_id
+      t.integer :user_id
+      t.integer :page_number
+      t.timestamps null: false
     end
+    add_index :extracted_pages, :document_id
+    add_index :extracted_pages, :category_id
+    add_index :extracted_pages, [:document_id, :category_id]
+    add_index :extracted_pages, [:document_id, :page_number]
+  end
+end
+```
-Model Document must have field which contains path to file(supports [different archive types](https://github.com/phlowerteam/total_compressor) that contains [txt, pdf, doc/x, txt, html, rtf, ...](https://www.exoplatform.com/docs/public/index.jsp?topic=%2FPLF43%2FPLFAdminGuide.Configuration.JODConverter.html))
+Model Document must have field which contains path to file(supports [different archive types](https://github.com/phlowerteam/total_compressor) that contains [txt, pdf, doc/x, txt, html, rtf, ...](https://docs-old.exoplatform.org/public/index.jsp?topic=%2FPLF41%2FPLFAdminGuide.Configuration.JODConverter.html))
 Add to model next parameters for initializing:
-        class Document < ActiveRecord::Base
-          include ActAsPageExtractor
+```rb
+  class Document < ActiveRecord::Base
+    include ActAsPageExtractor
-          act_as_page_extractor options: {
-            document_class:    'Document',
-            save_as_pdf:       true,
-            filename:          :filename,
-            document_id:       :document_id,
-            additional_fields: [:category_id, :user_id],
-            #file_storage:      "/full/path/to/documents/storage",
-            #pdf_storage:       "/full/path/to/extracted/pdf/storage"
-          }
+    act_as_page_extractor options: {
+      document_class:    'Document',
+      save_as_pdf:       true,
+      filename:          :filename,
+      document_id:       :document_id,
+      additional_fields: [:category_id, :user_id],
+      #file_storage:      "/full/path/to/documents/storage",
+      #pdf_storage:       "/full/path/to/extracted/pdf/storage"
+    }
-          has_many :extracted_pages, dependent: :destroy
-      end
+    has_many :extracted_pages, dependent: :destroy
+end
+```
 Now our instance has few new methods:
-    document = Document.first
-    document.page_extract!
-    document.extracted_pages
-    document.pdf_path # if option 'save_as_pdf' is 'true'
+```rb
+document = Document.first
+document.page_extract!
+document.extracted_pages
+document.pdf_path # if option 'save_as_pdf' is 'true'
-    # Access to pages
-    ExtractedPage.count
+# Access to pages
+ExtractedPage.count
-    # Importing whole directory of documents
-    ActAsPageExtractor.import_files('/path/to/foler/with/documents')
+# Importing whole directory of documents
+ActAsPageExtractor.import_files('/path/to/foler/with/documents')
-    # We can use cron for run the processing of all the new documents
-    ActAsPageExtractor.start_extraction
+# We can use cron for run the processing of all the new documents
+ActAsPageExtractor.start_extraction
-    # Getting statistics information of all documents
-    ActAsPageExtractor.statistics
+# Getting statistics information of all documents
+ActAsPageExtractor.statistics
+```
-Parameters of initializing `act_as_page_extractor options: { ... }`:
+Parameters of initializing **act_as_page_extractor**:
-`document_class` - name of model (e.g. 'Document)
-`save_as_pdf` - boolean [true, false] when we want save temporary pdf
-`filename` - name of field which contains access to the file and it should be an object with 'url' method that returns path to file (e.g. CarrierWave object with 'filename.url')
-`document_id` - name for saving id
-`additional_fields` - additional fields that added to extracted page (e.g. for indexing, etc.)
-`file_storage` - path for saving tmp files (by default it is "public")
-`pdf_storage` - path for saving pdf (by default it is "public/uploads/extracted/pdf")
+* **document_class** - name of model (e.g. Document)
+* **save_as_pdf** - boolean [true, false] when we want save temporary pdf
+* **filename** - name of field which contains access to the file and it should be an object with 'url' method that returns path to file (e.g. CarrierWave object with 'filename.url')
+* **document_id** - name for saving id
+* **additional_fields** - additional fields that added to extracted page (e.g. for indexing, etc.)
+* **file_storage** - path for saving tmp files (by default it is "public")
+* **pdf_storage** - path for saving pdf (by default it is "public/uploads/extracted/pdf")
 ## Run tests
-    $ COVERAGE=true rspec
+```sh
+rspec
+```
 ## Contributing
 1. Fork it
 2. Create your feature branch (`git checkout -b my-new-feature`)
@@ -115,5 +124,5 @@ https://github.com/phlowerteam
 phlowerteam@gmail.com
 ## License
-Copyright (c) 2017 PhlowerTeam
+Copyright (c) 2024 PhlowerTeam
 MIT License

data/act_as_page_extractor.gemspec CHANGED Viewed

@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
   spec.email         = ['phlowerteam@gmail.com']
   spec.description   = %q{Library (Docsplit wrapper) for text extraction from pdf, doc/x, txt files with OpenOffice}
   spec.summary       = %q{Uses system calls}
-  spec.homepage      = 'https://github.com/phlowerteam'
+  spec.homepage      = 'https://github.com/phlowerteam/act_as_page_extractor'
   spec.license       = 'MIT'
   spec.files         = `git ls-files`.split($/)

data/lib/act_as_page_extractor/modules/tools.rb CHANGED Viewed

@@ -1,17 +1,17 @@
 require 'timeout'
 module ActAsPageExtractor
+  # :nocov:
   def timeout_wrapper
     result = nil
     begin
       result = Timeout::timeout(60*5) { yield }
     rescue
-    # :nocov:
     ensure
-    # :nocov:
       result
     end
   end
+  # :nocov:
   def is_extracted
     @pdf_pages.to_i > 0 && self.extracted_pages.count == @pdf_pages
@@ -46,11 +46,11 @@ module ActAsPageExtractor
     # ap "@copy_document_path"
     # ap @copy_document_path
     # ap "@document_path"
-    ap @document_path
+      # ap @document_path
     # ap "@pdf_path"
     # ap @pdf_path
     # ap "@pdf_pages"
-    ap @pdf_pages
+      # ap @pdf_pages
   end
   # :nocov:
 end

data/lib/act_as_page_extractor/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # :nocov:
 module ActAsPageExtractor
-  VERSION = "0.6.1"
+  VERSION = "0.6.2"
 end
 # :nocov:

data/spec/spec_helper.rb CHANGED Viewed

@@ -1,7 +1,11 @@
-if ENV['COVERAGE']
+unless ENV['SKIP_COVERAGE']
   require 'simplecov'
-  SimpleCov.start 'rails'
+  SimpleCov.start 'rails' do
+    add_filter 'vendor'
+  end
+  SimpleCov.minimum_coverage 100
 end
 require 'rspec'
 require 'support/models'
 require 'act_as_page_extractor'

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: act_as_page_extractor
 version: !ruby/object:Gem::Version
-  version: 0.6.1
+  version: 0.6.2
 platform: ruby
 authors:
 - PhlowerTeam
@@ -249,7 +249,7 @@ files:
 - test/test-doc-3-pages.rtf
 - test/test-doc-3-pages.txt
 - test/test-doc-3-pages.wrong
-homepage: https://github.com/phlowerteam
+homepage: https://github.com/phlowerteam/act_as_page_extractor
 licenses:
 - MIT
 metadata: {}