RubyGems - acro_that - Versions diffs - 0.1.0 - Mend

acro_that 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +7 -0
data/.DS_Store +0 -0
data/.gitignore +8 -0
data/.rubocop.yml +78 -0
data/Gemfile +5 -0
data/Gemfile.lock +86 -0
data/README.md +360 -0
data/Rakefile +18 -0
data/acro_that.gemspec +34 -0
data/docs/README.md +99 -0
data/docs/dict_scan_explained.md +341 -0
data/docs/object_streams.md +311 -0
data/docs/pdf_structure.md +251 -0
data/lib/acro_that/actions/add_field.rb +278 -0
data/lib/acro_that/actions/add_signature_appearance.rb +422 -0
data/lib/acro_that/actions/base.rb +44 -0
data/lib/acro_that/actions/remove_field.rb +158 -0
data/lib/acro_that/actions/update_field.rb +301 -0
data/lib/acro_that/dict_scan.rb +413 -0
data/lib/acro_that/document.rb +331 -0
data/lib/acro_that/field.rb +143 -0
data/lib/acro_that/incremental_writer.rb +244 -0
data/lib/acro_that/object_resolver.rb +376 -0
data/lib/acro_that/objstm.rb +75 -0
data/lib/acro_that/pdf_writer.rb +97 -0
data/lib/acro_that/version.rb +5 -0
data/lib/acro_that.rb +24 -0
metadata +143 -0

data/lib/acro_that/document.rb ADDED Viewed

@@ -0,0 +1,331 @@
+# frozen_string_literal: true
+module AcroThat
+  class Document
+    attr_reader :path
+    # Flatten a PDF to remove incremental updates
+    def self.flatten_pdf(input_path, output_path = nil)
+      output = new(input_path).flatten
+      if output_path
+        File.binwrite(output_path, output)
+        return output_path
+      else
+        return new(StringIO.new(output))
+      end
+    end
+    def initialize(path_or_io)
+      @path = path_or_io.is_a?(String) ? path_or_io : nil
+      @raw = case path_or_io
+             when String then File.binread(path_or_io)
+             else path_or_io.binmode
+                  path_or_io.read
+             end
+      @resolver = AcroThat::ObjectResolver.new(@raw)
+      @patches = []
+    end
+    # Flatten this document to remove incremental updates
+    def flatten
+      root_ref = @resolver.root_ref
+      raise "Cannot flatten: no /Root found" unless root_ref
+      objects = []
+      @resolver.each_object do |ref, body|
+        objects << { ref: ref, body: body } if body
+      end
+      objects.sort_by! { |obj| obj[:ref][0] }
+      writer = PDFWriter.new
+      writer.write_header
+      objects.each do |obj|
+        writer.write_object(obj[:ref], obj[:body])
+      end
+      writer.write_xref
+      trailer_dict = @resolver.trailer_dict
+      info_ref = nil
+      if trailer_dict =~ %r{/Info\s+(\d+)\s+(\d+)\s+R}
+        info_ref = [::Regexp.last_match(1).to_i, ::Regexp.last_match(2).to_i]
+      end
+      # Write trailer
+      max_obj_num = objects.map { |obj| obj[:ref][0] }.max || 0
+      writer.write_trailer(max_obj_num + 1, root_ref, info_ref)
+      writer.output
+    end
+    # Flatten this document in-place (mutates current instance)
+    def flatten!
+      flattened_content = flatten
+      @raw = flattened_content
+      @resolver = AcroThat::ObjectResolver.new(flattened_content)
+      @patches = []
+      self
+    end
+    # Return an array of Field(name, value, type, ref)
+    def list_fields
+      fields = []
+      field_widgets = {}
+      widgets_by_name = {}
+      # First pass: collect widget information
+      @resolver.each_object do |ref, body|
+        next unless DictScan.is_widget?(body)
+        # Extract position from widget
+        rect_tok = DictScan.value_token_after("/Rect", body)
+        next unless rect_tok && rect_tok.start_with?("[")
+        # Parse [x y x+width y+height] format
+        rect_values = rect_tok.scan(/[-+]?\d*\.?\d+/).map(&:to_f)
+        next unless rect_values.length == 4
+        x, y, x2, y2 = rect_values
+        width = x2 - x
+        height = y2 - y
+        page_num = nil
+        if body =~ %r{/P\s+(\d+)\s+(\d+)\s+R}
+          page_ref = [Integer(::Regexp.last_match(1)), Integer(::Regexp.last_match(2))]
+          page_num = find_page_number_for_ref(page_ref)
+        end
+        widget_info = {
+          x: x, y: y, width: width, height: height, page: page_num
+        }
+        if body =~ %r{/Parent\s+(\d+)\s+(\d+)\s+R}
+          parent_ref = [Integer(::Regexp.last_match(1)), Integer(::Regexp.last_match(2))]
+          field_widgets[parent_ref] ||= []
+          field_widgets[parent_ref] << widget_info
+        end
+        next unless body.include?("/T")
+        t_tok = DictScan.value_token_after("/T", body)
+        if t_tok
+          widget_name = DictScan.decode_pdf_string(t_tok)
+          if widget_name && !widget_name.empty?
+            widgets_by_name[widget_name] ||= []
+            widgets_by_name[widget_name] << widget_info
+          end
+        end
+      end
+      # Second pass: collect all fields (both field objects and widget annotations with /T)
+      @resolver.each_object do |ref, body|
+        next unless body&.include?("/T")
+        is_widget_field = DictScan.is_widget?(body)
+        hint = body.include?("/FT") || is_widget_field || body.include?("/Kids") || body.include?("/Parent")
+        next unless hint
+        t_tok = DictScan.value_token_after("/T", body)
+        next unless t_tok
+        name = DictScan.decode_pdf_string(t_tok)
+        next if name.nil? || name.empty? # Skip fields with empty names (deleted fields)
+        v_tok = body.include?("/V") ? DictScan.value_token_after("/V", body) : nil
+        value = v_tok && v_tok != "<<" ? DictScan.decode_pdf_string(v_tok) : nil
+        ft_tok = body.include?("/FT") ? DictScan.value_token_after("/FT", body) : nil
+        type = ft_tok
+        position = {}
+        is_widget_annot = DictScan.is_widget?(body)
+        if is_widget_annot
+          rect_tok = DictScan.value_token_after("/Rect", body)
+          if rect_tok && rect_tok.start_with?("[")
+            rect_values = rect_tok.scan(/[-+]?\d*\.?\d+/).map(&:to_f)
+            if rect_values.length == 4
+              x, y, x2, y2 = rect_values
+              position = { x: x, y: y, width: x2 - x, height: y2 - y }
+              if body =~ %r{/P\s+(\d+)\s+(\d+)\s+R}
+                page_ref = [Integer(::Regexp.last_match(1)), Integer(::Regexp.last_match(2))]
+                position[:page] = find_page_number_for_ref(page_ref)
+              end
+            end
+          end
+        elsif field_widgets[ref]
+          widget_info = field_widgets[ref].first
+          position = {
+            x: widget_info[:x],
+            y: widget_info[:y],
+            width: widget_info[:width],
+            height: widget_info[:height],
+            page: widget_info[:page]
+          }
+        elsif widgets_by_name[name]
+          widget_info = widgets_by_name[name].first
+          position = {
+            x: widget_info[:x],
+            y: widget_info[:y],
+            width: widget_info[:width],
+            height: widget_info[:height],
+            page: widget_info[:page]
+          }
+        end
+        fields << Field.new(name, value, type, ref, self, position)
+      end
+      if fields.empty?
+        stripped = DictScan.strip_stream_bodies(@raw)
+        DictScan.each_dictionary(stripped) do |dict_src|
+          next unless dict_src.include?("/T")
+          is_widget_field_fallback = DictScan.is_widget?(dict_src)
+          hint = dict_src.include?("/FT") || is_widget_field_fallback || dict_src.include?("/Kids") || dict_src.include?("/Parent")
+          next unless hint
+          t_tok = DictScan.value_token_after("/T", dict_src)
+          next unless t_tok
+          name = DictScan.decode_pdf_string(t_tok)
+          next if name.nil? || name.empty? # Skip fields with empty names (deleted fields)
+          v_tok = dict_src.include?("/V") ? DictScan.value_token_after("/V", dict_src) : nil
+          value = v_tok && v_tok != "<<" ? DictScan.decode_pdf_string(v_tok) : nil
+          ft_tok = dict_src.include?("/FT") ? DictScan.value_token_after("/FT", dict_src) : nil
+          fields << Field.new(name, value, ft_tok, [-1, 0], self)
+        end
+      end
+      fields.group_by(&:name).values.map { |arr| arr.min_by { |f| f.ref[0] } }
+    end
+    # Add a new field to the AcroForm /Fields array
+    def add_field(name, options = {})
+      action = Actions::AddField.new(self, name, options)
+      result = action.call
+      if result
+        position = {
+          x: options[:x] || 100,
+          y: options[:y] || 500,
+          width: options[:width] || 100,
+          height: options[:height] || 20,
+          page: options[:page] || 1
+        }
+        field_obj_num = action.field_obj_num
+        field_type = action.field_type
+        field_value = action.field_value
+        Field.new(name, field_value, field_type, [field_obj_num, 0], self, position)
+      end
+    end
+    # Update field by name, setting /V and optionally /AS on widgets
+    def update_field(name, new_value, new_name: nil)
+      # First try to find in list_fields (already written fields)
+      field = list_fields.find { |f| f.name == name }
+      # If not found, check if field was just added (in patches) and create a Field object for it
+      unless field
+        patches = @patches
+        field_patch = patches.find do |p|
+          next unless p[:body]
+          next unless p[:body].include?("/T")
+          t_tok = DictScan.value_token_after("/T", p[:body])
+          next unless t_tok
+          field_name = DictScan.decode_pdf_string(t_tok)
+          field_name == name
+        end
+        if field_patch && field_patch[:body].include?("/FT")
+          ft_tok = DictScan.value_token_after("/FT", field_patch[:body])
+          if ft_tok
+            # Create a temporary Field object for newly added field
+            position = {}
+            field = Field.new(name, nil, ft_tok, field_patch[:ref], self, position)
+          end
+        end
+      end
+      return false unless field
+      field.update(new_value, new_name: new_name)
+    end
+    # Remove field by name from the AcroForm /Fields array
+    def remove_field(fld)
+      field = fld.is_a?(Field) ? fld : list_fields.find { |f| f.name == fld }
+      return false unless field
+      field.remove
+    end
+    # Write out with an incremental update
+    def write(path_out = nil, flatten: false)
+      deduped_patches = @patches.reverse.uniq { |p| p[:ref] }.reverse
+      writer = AcroThat::IncrementalWriter.new(@raw, deduped_patches)
+      @raw = writer.render
+      @patches = []
+      @resolver = AcroThat::ObjectResolver.new(@raw)
+      flatten! if flatten
+      if path_out
+        File.binwrite(path_out, @raw)
+        return true
+      else
+        return @raw
+      end
+    end
+    private
+    def find_page_number_for_ref(page_ref)
+      page_objects = []
+      @resolver.each_object do |ref, body|
+        next unless body&.include?("/Type /Page")
+        page_objects << ref
+      end
+      return nil if page_objects.empty?
+      page_index = page_objects.index(page_ref)
+      return nil unless page_index
+      page_index + 1
+    end
+    def next_fresh_object_number
+      max_obj_num = 0
+      @resolver.each_object do |ref, _|
+        max_obj_num = [max_obj_num, ref[0]].max
+      end
+      @patches.each do |p|
+        max_obj_num = [max_obj_num, p[:ref][0]].max
+      end
+      max_obj_num + 1
+    end
+    def acroform_ref
+      root_ref = @resolver.root_ref
+      return nil unless root_ref
+      cat_body = @resolver.object_body(root_ref)
+      return nil unless cat_body =~ %r{/AcroForm\s+(\d+)\s+(\d+)\s+R}
+      [Integer(::Regexp.last_match(1)), Integer(::Regexp.last_match(2))]
+    end
+  end
+end

data/lib/acro_that/field.rb ADDED Viewed

@@ -0,0 +1,143 @@
+# frozen_string_literal: true
+module AcroThat
+  # Represents a PDF form field
+  class Field
+    attr_accessor :value
+    attr_reader :name, :type, :ref, :x, :y, :width, :height, :page
+    TYPES = {
+      text: "/Tx",
+      button: "/Btn",
+      choice: "/Ch",
+      signature: "/Sig"
+    }.freeze
+    # Reverse lookup: map type strings to symbol keys
+    TYPE_KEYS = TYPES.invert.freeze
+    def initialize(name, value, type, ref, document = nil, position = {})
+      @name = name
+      @value = value
+      # Normalize type: accept symbol keys or type strings, default to "/Tx"
+      normalized_type = if type.is_a?(Symbol)
+                          TYPES[type] || "/Tx"
+                        else
+                          type.to_s.strip
+                        end
+      @type = normalized_type.empty? ? "/Tx" : normalized_type
+      @ref = ref
+      @document = document
+      @x = position[:x]
+      @y = position[:y]
+      @width = position[:width]
+      @height = position[:height]
+      @page = position[:page]
+    end
+    # Check if this is a text field
+    def text_field?
+      type == "/Tx"
+    end
+    # Check if this is a button field (checkbox/radio)
+    def button_field?
+      type == "/Btn"
+    end
+    # Check if this is a choice field (dropdown/list)
+    def choice_field?
+      type == "/Ch"
+    end
+    # Check if this is a signature field
+    def signature_field?
+      type == "/Sig"
+    end
+    # Check if the field has a value
+    def has_value?
+      !value.nil? && !value.to_s.empty?
+    end
+    # Get the object number (first element of ref)
+    def object_number
+      ref[0]
+    end
+    # Get the generation number (second element of ref)
+    def generation
+      ref[1]
+    end
+    # Check if field reference is valid (not [-1, 0] placeholder)
+    def valid_ref?
+      ref != [-1, 0]
+    end
+    # Equality comparison
+    def ==(other)
+      return false unless other.is_a?(Field)
+      name == other.name &&
+        value == other.value &&
+        type == other.type &&
+        ref == other.ref
+    end
+    # String representation for debugging
+    def to_s
+      type_str = type.inspect
+      type_str += " (:#{type_key})" if type_key
+      pos_str = if x && y && width && height
+                  " x=#{x} y=#{y} w=#{width} h=#{height}"
+                else
+                  " position=(unknown)"
+                end
+      page_str = page ? " page=#{page}" : ""
+      "#<AcroThat::Field name=#{name.inspect} type=#{type_str} value=#{value.inspect} ref=#{ref.inspect}#{pos_str}#{page_str}>"
+    end
+    alias inspect to_s
+    # Check if position is known
+    def has_position?
+      !x.nil? && !y.nil? && !width.nil? && !height.nil?
+    end
+    # Get the symbol key for the field type (e.g., :text for "/Tx")
+    # Returns nil if the type is not in the TYPES mapping
+    def type_key
+      TYPE_KEYS[type]
+    end
+    # Update this field's value and optionally rename it in the document
+    # Returns true if the field was found and queued for write.
+    def update(new_value, new_name: nil)
+      return false unless @document
+      return false unless valid_ref?
+      action = Actions::UpdateField.new(@document, @name, new_value, new_name: new_name)
+      result = action.call
+      # Update the local value if update was successful
+      @value = new_value if result
+      # Update the local name if rename was successful
+      @name = new_name if result && new_name && !new_name.empty?
+      result
+    end
+    # Remove this field from the AcroForm /Fields array and mark the field object as deleted.
+    # Note: This does not purge page /Annots widgets (non-trivial); most viewers will hide the field
+    # once it is no longer in the field tree.
+    # Returns true if the field was removed.
+    def remove
+      return false unless @document
+      return false unless valid_ref?
+      action = Actions::RemoveField.new(@document, self)
+      action.call
+    end
+  end
+end