pdf-reader-find_text 0.1.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 19766f6210ef79a6a29a22f6c0f501c8acc2cb18d539511a8fd163962735264f
4
- data.tar.gz: 101ced07a4294793b2f7b8addb54e7df2f25a254fc5c88e0c5f6f2c13dba6453
3
+ metadata.gz: 3ca302a11748be3c14295a72be6e3e231f1bce93cf92b4eb9a0cec8ff022c1f9
4
+ data.tar.gz: 79bea9f5a62553f91106ad7d47a93d56fd602b86729a911eaf06ec88c39817dc
5
5
  SHA512:
6
- metadata.gz: 27d305f423497fbf1bdb14be0eb777250bd36e6b16f673609e3d193fd9ca317eb7e03b8669448c8987c39bfd15ccabeedc8f823fad654567da08b960d5e0a787
7
- data.tar.gz: 4ac4586a856edc0202b6ce60819de435b4786c369a1ab3e022ccf5907188184b7d2b89b19fd027a6bc8495e81ea0aa41f6761c5881531bf778ebf0710bf3d0a1
6
+ metadata.gz: fab73686e49d78ae4eaceb14cbb84926093c2c54eb088e072b6bab66794d0c5d1a5042bac82d2cf30f1254cb27d740d846685db54dc3b8d578e5ddb0b325cd3d
7
+ data.tar.gz: e89dc5f098da69fec703ccc9b6d2aae92c5ff25d2b29b6ef90bd3081e8303626aad9e05db5f535a4f2c77fd4158ac214048ba51e82d8d6d594f29e6b70f2d121
data/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.1.2] - 2022-02-24
4
+
5
+ - Handle strings with spaces in a better way
6
+
7
+ ## [0.1.1] - 2022-02-23
8
+
9
+ - Remove spaces before looking for text
10
+
3
11
  ## [0.1.0] - 2022-02-23
4
12
 
5
13
  - Initial release
@@ -3,7 +3,7 @@
3
3
  module PDF
4
4
  class Reader
5
5
  module FindText
6
- VERSION = "0.1.1"
6
+ VERSION = "1.0.0"
7
7
  end
8
8
  end
9
9
  end
@@ -5,14 +5,22 @@ require "pdf-reader"
5
5
  require_relative "find_text/version"
6
6
 
7
7
  module PDF::Reader::FindText
8
- def find_text(text)
9
- text = text.tr(' ', '')
8
+ def find_text(value)
9
+ runs(merge: false).each_cons(value.tr(' ', '').size).map do |chars|
10
+ string = merge_runs_with_max_length(chars, value.size)
11
+ string if string.text[0, value.size] == value
12
+ end.compact
13
+ end
14
+
15
+ private
10
16
 
11
- runs(merge: false).each_cons(text.size).select do |r|
12
- r.map(&:text).join == text
13
- end.map do |r|
14
- PDF::Reader::TextRun.new r.first.x, r.first.y,
15
- r.sum(&:width), r.map(&:font_size).max, r.map(&:text).join
17
+ def merge_runs_with_max_length(chars, length)
18
+ chars.inject do |string, char|
19
+ if string.mergable?(char) && string.text.size < length
20
+ string + char
21
+ else
22
+ string
23
+ end
16
24
  end
17
25
  end
18
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader-find_text
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Guillaume Dott
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-02-23 00:00:00.000000000 Z
11
+ date: 2022-11-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pdf-reader
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 2.9.2
19
+ version: '2.9'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 2.9.2
26
+ version: '2.9'
27
27
  description: PDF::Reader extension to find text in PDF and get the page and position
28
28
  email:
29
29
  - guillaume+github@dott.fr
@@ -61,7 +61,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
61
61
  - !ruby/object:Gem::Version
62
62
  version: '0'
63
63
  requirements: []
64
- rubygems_version: 3.3.5
64
+ rubygems_version: 3.3.25
65
65
  signing_key:
66
66
  specification_version: 4
67
67
  summary: PDF::Reader extension to find text in PDF