pdfbeads 1.0.5 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +14 -1
 - data/bin/pdfbeads +1 -1
 - data/lib/pdfbeads/pdfbuilder.rb +178 -57
 - data/lib/pdfbeads/pdffont.rb +68 -0
 - metadata +4 -4
 
    
        data/ChangeLog
    CHANGED
    
    | 
         @@ -25,4 +25,17 @@ 
     | 
|
| 
       25 
25 
     | 
    
         
             
                  JPEG files were still written with the 'JP2' extension.
         
     | 
| 
       26 
26 
     | 
    
         | 
| 
       27 
27 
     | 
    
         
             
                * Some tweaks to minimize the effect of page labels being inconsistently handled
         
     | 
| 
       28 
     | 
    
         
            -
                  in various PDF viewers (prefer ISO-8859-1 strings if possible).
         
     | 
| 
      
 28 
     | 
    
         
            +
                  in various PDF viewers (prefer ISO-8859-1 strings if possible).
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
            2012 February 5 (Alexey Kryukov) Version 1.0.6
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
                + Add Greek letters (the monotonic set) to the list of characters with hardcoded
         
     | 
| 
      
 33 
     | 
    
         
            +
                  glyph names and width.
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
                * Minor bugs fixed.
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
            2012 February 10 (Alexey Kryukov) Version 1.0.7
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
                + An attempt to achive better positioning of the hidden text layer, taking into
         
     | 
| 
      
 40 
     | 
    
         
            +
                  account not just lines, but also individual words. This should work with hOCR
         
     | 
| 
      
 41 
     | 
    
         
            +
                  files produced with Cuneiform or Tesseract.
         
     | 
    
        data/bin/pdfbeads
    CHANGED
    
    | 
         @@ -144,7 +144,7 @@ OptionParser.new() do |opts| 
     | 
|
| 
       144 
144 
     | 
    
         
             
              opts.on("-b", "--bg-compression FORMAT",
         
     | 
| 
       145 
145 
     | 
    
         
             
                            ['JP2', 'JPX', 'J2K', 'JPEG2000', 'JPG', 'JPEG', 'LOSSLESS', 'PNG', 'DEFLATE'],
         
     | 
| 
       146 
146 
     | 
    
         
             
                            "Compression method for background images. Acceptable",
         
     | 
| 
       147 
     | 
    
         
            -
                            "values are JP2|JPX|JPEG2000, JPG|JPEG or LOSSLESS.",
         
     | 
| 
      
 147 
     | 
    
         
            +
                            "values are JP2|JPX|JPEG2000, JPG|JPEG or PNG|LOSSLESS.",
         
     | 
| 
       148 
148 
     | 
    
         
             
                            "JP2 is used by default, unless this format is not",
         
     | 
| 
       149 
149 
     | 
    
         
             
                            "supported by the available version of ImageMagick" ) do |format|
         
     | 
| 
       150 
150 
     | 
    
         
             
                case format.upcase
         
     | 
    
        data/lib/pdfbeads/pdfbuilder.rb
    CHANGED
    
    | 
         @@ -357,7 +357,7 @@ class PDFBeads::PDFBuilder 
     | 
|
| 
       357 
357 
     | 
    
         
             
                  fin.each do |fl|
         
     | 
| 
       358 
358 
     | 
    
         
             
                    next if /^\#/.match( fl )
         
     | 
| 
       359 
359 
     | 
    
         | 
| 
       360 
     | 
    
         
            -
                    if /^\/?([A-Za-z]+)[ 
     | 
| 
      
 360 
     | 
    
         
            +
                    if /^\/?([A-Za-z]+)[ \t]*:[ \t]+\"(.*)\"/.match( fl )
         
     | 
| 
       361 
361 
     | 
    
         
             
                      key = $1
         
     | 
| 
       362 
362 
     | 
    
         
             
                      if keys.include? key
         
     | 
| 
       363 
363 
     | 
    
         
             
                        begin
         
     | 
| 
         @@ -390,8 +390,8 @@ class PDFBeads::PDFBuilder 
     | 
|
| 
       390 
390 
     | 
    
         
             
                  end
         
     | 
| 
       391 
391 
     | 
    
         | 
| 
       392 
392 
     | 
    
         
             
                  item_text = item[:title].to_binary
         
     | 
| 
       393 
     | 
    
         
            -
                  item_text.sub!( /\ 
     | 
| 
       394 
     | 
    
         
            -
                  item_text.sub!( /\ 
     | 
| 
      
 393 
     | 
    
         
            +
                  item_text.sub!( /\x28/,"\x5C\x28" )
         
     | 
| 
      
 394 
     | 
    
         
            +
                  item_text.sub!( /\x29/,"\x5C\x29" )
         
     | 
| 
       395 
395 
     | 
    
         
             
                  item[:pdfobj] = XObj.new(Hash[
         
     | 
| 
       396 
396 
     | 
    
         
             
                    'Title'  => "(\xFE\xFF#{item_text.to_text})",
         
     | 
| 
       397 
397 
     | 
    
         
             
                    'Parent' => ref(item[:parent][:pdfobj].getID),
         
     | 
| 
         @@ -442,10 +442,95 @@ class PDFBeads::PDFBuilder 
     | 
|
| 
       442 
442 
     | 
    
         
             
                return out
         
     | 
| 
       443 
443 
     | 
    
         
             
              end
         
     | 
| 
       444 
444 
     | 
    
         | 
| 
      
 445 
     | 
    
         
            +
              def elementText( elem,charset )
         
     | 
| 
      
 446 
     | 
    
         
            +
                txt = ''
         
     | 
| 
      
 447 
     | 
    
         
            +
                begin
         
     | 
| 
      
 448 
     | 
    
         
            +
                  txt = elem.to_plain_text.strip
         
     | 
| 
      
 449 
     | 
    
         
            +
                  txt = Iconv.iconv( 'utf-8',charset,txt ).first unless charset.downcase.eql? 'utf-8'
         
     | 
| 
      
 450 
     | 
    
         
            +
                rescue
         
     | 
| 
      
 451 
     | 
    
         
            +
                end
         
     | 
| 
      
 452 
     | 
    
         
            +
             
     | 
| 
      
 453 
     | 
    
         
            +
                txt.force_encoding( 'utf-8' ) if txt.respond_to? :force_encoding
         
     | 
| 
      
 454 
     | 
    
         
            +
                return txt
         
     | 
| 
      
 455 
     | 
    
         
            +
              end
         
     | 
| 
      
 456 
     | 
    
         
            +
             
     | 
| 
      
 457 
     | 
    
         
            +
              def getOCRUnits( ocr_line,lbbox,fsize,charset,xscale,yscale )
         
     | 
| 
      
 458 
     | 
    
         
            +
                units = Array.new()
         
     | 
| 
      
 459 
     | 
    
         
            +
                ocr_words = ocr_line.search("//span[@class='ocrx_word']")
         
     | 
| 
      
 460 
     | 
    
         
            +
                ocr_chars = nil
         
     | 
| 
      
 461 
     | 
    
         
            +
                ocr_chars = ocr_line.at("//span[@class='ocr_cinfo']") if ocr_words.length == 0
         
     | 
| 
      
 462 
     | 
    
         
            +
             
     | 
| 
      
 463 
     | 
    
         
            +
                # If 'ocrx_word' elements are available (as in Tesseract owtput), split the line
         
     | 
| 
      
 464 
     | 
    
         
            +
                # into individual words
         
     | 
| 
      
 465 
     | 
    
         
            +
                if ocr_words.length > 0
         
     | 
| 
      
 466 
     | 
    
         
            +
                  ocr_words.each do |word|
         
     | 
| 
      
 467 
     | 
    
         
            +
                    bbox = elementCoordinates( word,xscale,yscale )
         
     | 
| 
      
 468 
     | 
    
         
            +
                    txt = elementText( word,charset )
         
     | 
| 
      
 469 
     | 
    
         
            +
                    units << [txt,bbox]
         
     | 
| 
      
 470 
     | 
    
         
            +
                  end
         
     | 
| 
      
 471 
     | 
    
         
            +
             
     | 
| 
      
 472 
     | 
    
         
            +
                # If 'ocrx_cinfo' data is available (as in Cuneiform) owtput, then split it 
         
     | 
| 
      
 473 
     | 
    
         
            +
                # into individual characters and then combine them into words
         
     | 
| 
      
 474 
     | 
    
         
            +
                elsif not ocr_chars.nil? and ocr_chars.attributes.to_hash.has_key? 'title'
         
     | 
| 
      
 475 
     | 
    
         
            +
                  if /x_bboxes([-\s\d]+)/.match( ocr_chars.attributes.to_hash['title'] )
         
     | 
| 
      
 476 
     | 
    
         
            +
                    coords = $1.strip.split(/\s+/)
         
     | 
| 
      
 477 
     | 
    
         
            +
                    ltxt = elementText( ocr_line,charset )
         
     | 
| 
      
 478 
     | 
    
         
            +
                    charcnt = 0
         
     | 
| 
      
 479 
     | 
    
         
            +
                    ltxt.each_char { |uc| charcnt += 1 }
         
     | 
| 
      
 480 
     | 
    
         
            +
             
     | 
| 
      
 481 
     | 
    
         
            +
                    if charcnt <= coords.length/4
         
     | 
| 
      
 482 
     | 
    
         
            +
                      i = 0
         
     | 
| 
      
 483 
     | 
    
         
            +
                      wtxt = ''
         
     | 
| 
      
 484 
     | 
    
         
            +
                      bbox = [-1,-1,-1,-1]
         
     | 
| 
      
 485 
     | 
    
         
            +
                      ltxt.each_char do |uc|
         
     | 
| 
      
 486 
     | 
    
         
            +
                        cbbox = [ (coords[i*4].to_i*xscale).to_f,(coords[i*4+1].to_i*xscale).to_f,
         
     | 
| 
      
 487 
     | 
    
         
            +
                                  (coords[i*4+2].to_i*yscale).to_f,(coords[i*4+3].to_i*yscale).to_f ]
         
     | 
| 
      
 488 
     | 
    
         
            +
             
     | 
| 
      
 489 
     | 
    
         
            +
                        unless cbbox[0] < 0
         
     | 
| 
      
 490 
     | 
    
         
            +
                          bbox[0] = cbbox[0] if cbbox[0] < bbox[0] or bbox[0] < 0
         
     | 
| 
      
 491 
     | 
    
         
            +
                          bbox[1] = cbbox[1] if cbbox[1] < bbox[1] or bbox[1] < 0
         
     | 
| 
      
 492 
     | 
    
         
            +
                          bbox[2] = cbbox[2] if cbbox[2] > bbox[2] or bbox[2] < 0
         
     | 
| 
      
 493 
     | 
    
         
            +
                          bbox[3] = cbbox[3] if cbbox[3] > bbox[3] or bbox[3] < 0
         
     | 
| 
      
 494 
     | 
    
         
            +
                          wtxt << uc
         
     | 
| 
      
 495 
     | 
    
         
            +
             
     | 
| 
      
 496 
     | 
    
         
            +
                        else
         
     | 
| 
      
 497 
     | 
    
         
            +
                          units << [wtxt,bbox]
         
     | 
| 
      
 498 
     | 
    
         
            +
                          bbox = [-1,-1,-1,-1]
         
     | 
| 
      
 499 
     | 
    
         
            +
                          if /^\s+$/.match( uc )
         
     | 
| 
      
 500 
     | 
    
         
            +
                            wtxt = ''
         
     | 
| 
      
 501 
     | 
    
         
            +
             
     | 
| 
      
 502 
     | 
    
         
            +
                          # A workaround for probable hpricot bug, which sometimes causes whitespace
         
     | 
| 
      
 503 
     | 
    
         
            +
                          # characters from inside a string to be stripped. So if we find
         
     | 
| 
      
 504 
     | 
    
         
            +
                          # a bounding box with negative values we assume there was a whitespace
         
     | 
| 
      
 505 
     | 
    
         
            +
                          # character here, even if not preserved in the string itself
         
     | 
| 
      
 506 
     | 
    
         
            +
                          else
         
     | 
| 
      
 507 
     | 
    
         
            +
                            wtxt = uc
         
     | 
| 
      
 508 
     | 
    
         
            +
                            i += 1
         
     | 
| 
      
 509 
     | 
    
         
            +
                            bbox =  [ (coords[i*4].to_i*xscale).to_f,(coords[i*4+1].to_i*xscale).to_f,
         
     | 
| 
      
 510 
     | 
    
         
            +
                                      (coords[i*4+2].to_i*yscale).to_f,(coords[i*4+3].to_i*yscale).to_f ]
         
     | 
| 
      
 511 
     | 
    
         
            +
                          end
         
     | 
| 
      
 512 
     | 
    
         
            +
                        end
         
     | 
| 
      
 513 
     | 
    
         
            +
                        i += 1
         
     | 
| 
      
 514 
     | 
    
         
            +
                      end
         
     | 
| 
      
 515 
     | 
    
         
            +
                      units << [wtxt,bbox] unless wtxt.eql? ''
         
     | 
| 
      
 516 
     | 
    
         
            +
                    end
         
     | 
| 
      
 517 
     | 
    
         
            +
                  end
         
     | 
| 
      
 518 
     | 
    
         
            +
                end
         
     | 
| 
      
 519 
     | 
    
         
            +
             
     | 
| 
      
 520 
     | 
    
         
            +
                # If neither word nor character bounding boxes are available, then store the line as a whole
         
     | 
| 
      
 521 
     | 
    
         
            +
                if units.length == 0
         
     | 
| 
      
 522 
     | 
    
         
            +
                  ltxt = elementText( ocr_line,charset )
         
     | 
| 
      
 523 
     | 
    
         
            +
                  units << [ltxt,lbbox] unless ltxt.eql? ''
         
     | 
| 
      
 524 
     | 
    
         
            +
                end
         
     | 
| 
      
 525 
     | 
    
         
            +
             
     | 
| 
      
 526 
     | 
    
         
            +
                units[units.length-1][0].sub!( /-\Z/, "\xC2\xAD" ) unless units.length == 0
         
     | 
| 
      
 527 
     | 
    
         
            +
                return units
         
     | 
| 
      
 528 
     | 
    
         
            +
              end
         
     | 
| 
      
 529 
     | 
    
         
            +
             
     | 
| 
       445 
530 
     | 
    
         
             
              def getPDFText( hocr,pheight,xscale,yscale,encodings )
         
     | 
| 
       446 
531 
     | 
    
         
             
                fsize = 10
         
     | 
| 
       447 
     | 
    
         
            -
                cur_enc =  
     | 
| 
       448 
     | 
    
         
            -
                ret = " BT 3 Tr  
     | 
| 
      
 532 
     | 
    
         
            +
                cur_enc = nil
         
     | 
| 
      
 533 
     | 
    
         
            +
                ret = " BT 3 Tr "
         
     | 
| 
       449 
534 
     | 
    
         | 
| 
       450 
535 
     | 
    
         
             
                charset = 'utf-8'
         
     | 
| 
       451 
536 
     | 
    
         
             
                hocr.search("//meta[@http-equiv='Content-Type']").each do |el|
         
     | 
| 
         @@ -455,71 +540,107 @@ class PDFBeads::PDFBuilder 
     | 
|
| 
       455 
540 
     | 
    
         
             
                end
         
     | 
| 
       456 
541 
     | 
    
         | 
| 
       457 
542 
     | 
    
         
             
                hocr.search("//span[@class='ocr_line']").each do |line|
         
     | 
| 
       458 
     | 
    
         
            -
                   
     | 
| 
       459 
     | 
    
         
            -
                   
     | 
| 
       460 
     | 
    
         
            -
             
     | 
| 
       461 
     | 
    
         
            -
                   
     | 
| 
       462 
     | 
    
         
            -
             
     | 
| 
      
 543 
     | 
    
         
            +
                  lbbox = elementCoordinates( line,xscale,yscale )
         
     | 
| 
      
 544 
     | 
    
         
            +
                  next if lbbox[2] - lbbox[0] <= 0 or lbbox[3] - lbbox[1] <= 0
         
     | 
| 
      
 545 
     | 
    
         
            +
                  units = getOCRUnits( line,lbbox,fsize,charset,xscale,yscale )
         
     | 
| 
      
 546 
     | 
    
         
            +
                  next if units.length == 0
         
     | 
| 
      
 547 
     | 
    
         
            +
             
     | 
| 
      
 548 
     | 
    
         
            +
                  wwidth = 0
         
     | 
| 
      
 549 
     | 
    
         
            +
                  ltxt = ''
         
     | 
| 
      
 550 
     | 
    
         
            +
                  units.each do |unit|
         
     | 
| 
      
 551 
     | 
    
         
            +
                    ltxt << unit[0]
         
     | 
| 
      
 552 
     | 
    
         
            +
                    wwidth += ( unit[1][2] - unit[1][0] )
         
     | 
| 
       463 
553 
     | 
    
         
             
                  end
         
     | 
| 
       464 
     | 
    
         
            -
                   
     | 
| 
       465 
     | 
    
         
            -
                   
     | 
| 
       466 
     | 
    
         
            -
                   
     | 
| 
      
 554 
     | 
    
         
            +
                  ratio = wwidth / @fdata.getLineWidth( ltxt,fsize )
         
     | 
| 
      
 555 
     | 
    
         
            +
                  pos = lbbox[0]
         
     | 
| 
      
 556 
     | 
    
         
            +
                  posdiff = 0
         
     | 
| 
       467 
557 
     | 
    
         | 
| 
       468 
     | 
    
         
            -
                  bbox = elementCoordinates( line,xscale,yscale )
         
     | 
| 
       469 
     | 
    
         
            -
                  ratio = ( bbox[2] - bbox[0] ) / @fdata.getLineWidth( txt,fsize )
         
     | 
| 
       470 
558 
     | 
    
         
             
                  ret << sprintf( "%f %f %f %f %f %f Tm ",
         
     | 
| 
       471 
     | 
    
         
            -
                    ratio, 0, 0, ratio,  
     | 
| 
       472 
     | 
    
         
            -
             
     | 
| 
       473 
     | 
    
         
            -
                  txt8 = ''
         
     | 
| 
       474 
     | 
    
         
            -
                  txt.each_char do |char|
         
     | 
| 
       475 
     | 
    
         
            -
                    begin
         
     | 
| 
       476 
     | 
    
         
            -
                      Iconv.iconv( "utf-16be","utf-8",char )
         
     | 
| 
       477 
     | 
    
         
            -
                    rescue
         
     | 
| 
       478 
     | 
    
         
            -
                      rawbytes = char.unpack( 'C*' )
         
     | 
| 
       479 
     | 
    
         
            -
                      bs = ''
         
     | 
| 
       480 
     | 
    
         
            -
                      rawbytes.each{ |b| bs << sprintf( "%02x",b ) }
         
     | 
| 
       481 
     | 
    
         
            -
                      $stderr.puts( "Warning: an invalid UTF-8 sequence (#{bs}) in the hOCR data." )
         
     | 
| 
       482 
     | 
    
         
            -
                      char = '?' * rawbytes.length
         
     | 
| 
       483 
     | 
    
         
            -
                    end
         
     | 
| 
      
 559 
     | 
    
         
            +
                    ratio, 0, 0, ratio, lbbox[0], pheight - lbbox[3] - @fdata.header['Descent'] * fsize / 1000.0 * ratio)
         
     | 
| 
      
 560 
     | 
    
         
            +
                  in_txt = false
         
     | 
| 
       484 
561 
     | 
    
         | 
| 
       485 
     | 
    
         
            -
             
     | 
| 
       486 
     | 
    
         
            -
                     
     | 
| 
       487 
     | 
    
         
            -
             
     | 
| 
       488 
     | 
    
         
            -
             
     | 
| 
       489 
     | 
    
         
            -
             
     | 
| 
       490 
     | 
    
         
            -
             
     | 
| 
       491 
     | 
    
         
            -
             
     | 
| 
       492 
     | 
    
         
            -
             
     | 
| 
       493 
     | 
    
         
            -
             
     | 
| 
       494 
     | 
    
         
            -
             
     | 
| 
       495 
     | 
    
         
            -
             
     | 
| 
       496 
     | 
    
         
            -
             
     | 
| 
       497 
     | 
    
         
            -
             
     | 
| 
       498 
     | 
    
         
            -
                         
     | 
| 
      
 562 
     | 
    
         
            +
                  units.each_index do |i|
         
     | 
| 
      
 563 
     | 
    
         
            +
                    unit = units[i]
         
     | 
| 
      
 564 
     | 
    
         
            +
                    wtxt = unit[0]
         
     | 
| 
      
 565 
     | 
    
         
            +
                    bbox = unit[1]
         
     | 
| 
      
 566 
     | 
    
         
            +
             
     | 
| 
      
 567 
     | 
    
         
            +
                    posdiff = ( (pos - bbox[0]) * 1000 / fsize / ratio ).to_i if i > 0
         
     | 
| 
      
 568 
     | 
    
         
            +
                    pos = bbox[0] + ( @fdata.getLineWidth( wtxt,fsize ) * ratio )
         
     | 
| 
      
 569 
     | 
    
         
            +
             
     | 
| 
      
 570 
     | 
    
         
            +
                    txt8 = ''
         
     | 
| 
      
 571 
     | 
    
         
            +
                    wtxt.each_char do |char|
         
     | 
| 
      
 572 
     | 
    
         
            +
                      begin
         
     | 
| 
      
 573 
     | 
    
         
            +
                        Iconv.iconv( "utf-16be","utf-8",char )
         
     | 
| 
      
 574 
     | 
    
         
            +
                      rescue
         
     | 
| 
      
 575 
     | 
    
         
            +
                        rawbytes = char.unpack( 'C*' )
         
     | 
| 
      
 576 
     | 
    
         
            +
                        bs = ''
         
     | 
| 
      
 577 
     | 
    
         
            +
                        rawbytes.each{ |b| bs << sprintf( "%02x",b ) }
         
     | 
| 
      
 578 
     | 
    
         
            +
                        $stderr.puts( "Warning: an invalid UTF-8 sequence (#{bs}) in the hOCR data." )
         
     | 
| 
      
 579 
     | 
    
         
            +
                        char = '?' * rawbytes.length
         
     | 
| 
       499 
580 
     | 
    
         
             
                      end
         
     | 
| 
       500 
581 
     | 
    
         | 
| 
       501 
     | 
    
         
            -
                       
     | 
| 
       502 
     | 
    
         
            -
             
     | 
| 
       503 
     | 
    
         
            -
                         
     | 
| 
       504 
     | 
    
         
            -
                           
     | 
| 
       505 
     | 
    
         
            -
             
     | 
| 
       506 
     | 
    
         
            -
             
     | 
| 
       507 
     | 
    
         
            -
                           
     | 
| 
      
 582 
     | 
    
         
            +
                      encoded = false
         
     | 
| 
      
 583 
     | 
    
         
            +
                      if cur_enc.nil? or not cur_enc.include? char
         
     | 
| 
      
 584 
     | 
    
         
            +
                        encodings.each_index do |i|
         
     | 
| 
      
 585 
     | 
    
         
            +
                          enc = encodings[i]
         
     | 
| 
      
 586 
     | 
    
         
            +
                          next if enc == cur_enc
         
     | 
| 
      
 587 
     | 
    
         
            +
             
     | 
| 
      
 588 
     | 
    
         
            +
                          if enc.include? char
         
     | 
| 
      
 589 
     | 
    
         
            +
                            if in_txt
         
     | 
| 
      
 590 
     | 
    
         
            +
                              ret << "#{posdiff} " if posdiff != 0
         
     | 
| 
      
 591 
     | 
    
         
            +
                              ret << "<#{txt8}> " unless txt8.eql? ''
         
     | 
| 
      
 592 
     | 
    
         
            +
                              ret << "] TJ "
         
     | 
| 
      
 593 
     | 
    
         
            +
                            end
         
     | 
| 
      
 594 
     | 
    
         
            +
                            cur_enc = enc
         
     | 
| 
      
 595 
     | 
    
         
            +
                            ret << "/Fnt#{i + 1} #{fsize} Tf "
         
     | 
| 
      
 596 
     | 
    
         
            +
                            txt8 = ''
         
     | 
| 
      
 597 
     | 
    
         
            +
                            posdiff = 0
         
     | 
| 
      
 598 
     | 
    
         
            +
                            encoded = true
         
     | 
| 
      
 599 
     | 
    
         
            +
                            in_txt = false
         
     | 
| 
      
 600 
     | 
    
         
            +
                            break
         
     | 
| 
      
 601 
     | 
    
         
            +
                          end
         
     | 
| 
       508 
602 
     | 
    
         
             
                        end
         
     | 
| 
       509 
603 
     | 
    
         | 
| 
       510 
     | 
    
         
            -
                         
     | 
| 
       511 
     | 
    
         
            -
                           
     | 
| 
       512 
     | 
    
         
            -
                           
     | 
| 
       513 
     | 
    
         
            -
             
     | 
| 
       514 
     | 
    
         
            -
                           
     | 
| 
      
 604 
     | 
    
         
            +
                        unless encoded
         
     | 
| 
      
 605 
     | 
    
         
            +
                          last = encodings[-1]
         
     | 
| 
      
 606 
     | 
    
         
            +
                          if last.length < 256
         
     | 
| 
      
 607 
     | 
    
         
            +
                            last << char
         
     | 
| 
      
 608 
     | 
    
         
            +
                          else
         
     | 
| 
      
 609 
     | 
    
         
            +
                            last = [ ' ',char ]
         
     | 
| 
      
 610 
     | 
    
         
            +
                            encodings << last
         
     | 
| 
      
 611 
     | 
    
         
            +
                          end
         
     | 
| 
      
 612 
     | 
    
         
            +
             
     | 
| 
      
 613 
     | 
    
         
            +
                          if cur_enc != last
         
     | 
| 
      
 614 
     | 
    
         
            +
                            if in_txt
         
     | 
| 
      
 615 
     | 
    
         
            +
                              ret << "#{posdiff} " if posdiff != 0
         
     | 
| 
      
 616 
     | 
    
         
            +
                              ret << "<#{txt8}> " unless txt8.eql? ''
         
     | 
| 
      
 617 
     | 
    
         
            +
                              ret << "] TJ "
         
     | 
| 
      
 618 
     | 
    
         
            +
                            end
         
     | 
| 
      
 619 
     | 
    
         
            +
                            cur_enc = last
         
     | 
| 
      
 620 
     | 
    
         
            +
                            ret << "/Fnt#{encodings.length} #{fsize} Tf "
         
     | 
| 
      
 621 
     | 
    
         
            +
                            txt8 = ''
         
     | 
| 
      
 622 
     | 
    
         
            +
                            posdiff = 0
         
     | 
| 
      
 623 
     | 
    
         
            +
                            in_txt = false
         
     | 
| 
      
 624 
     | 
    
         
            +
                          end
         
     | 
| 
       515 
625 
     | 
    
         
             
                        end
         
     | 
| 
       516 
626 
     | 
    
         
             
                      end
         
     | 
| 
      
 627 
     | 
    
         
            +
             
     | 
| 
      
 628 
     | 
    
         
            +
                      unless in_txt
         
     | 
| 
      
 629 
     | 
    
         
            +
                        ret << "[ "
         
     | 
| 
      
 630 
     | 
    
         
            +
                        in_txt = true
         
     | 
| 
      
 631 
     | 
    
         
            +
                      end
         
     | 
| 
      
 632 
     | 
    
         
            +
                      txt8 << sprintf( "%02X",cur_enc.index(char) )
         
     | 
| 
       517 
633 
     | 
    
         
             
                    end
         
     | 
| 
       518 
634 
     | 
    
         | 
| 
       519 
     | 
    
         
            -
                    txt8 
     | 
| 
      
 635 
     | 
    
         
            +
                    unless txt8.eql? ''
         
     | 
| 
      
 636 
     | 
    
         
            +
                      ret << "#{posdiff} " if posdiff != 0
         
     | 
| 
      
 637 
     | 
    
         
            +
                      ret << "<#{txt8}> "
         
     | 
| 
      
 638 
     | 
    
         
            +
                    end
         
     | 
| 
      
 639 
     | 
    
         
            +
                  end
         
     | 
| 
      
 640 
     | 
    
         
            +
                  if in_txt
         
     | 
| 
      
 641 
     | 
    
         
            +
                    ret << "] TJ "
         
     | 
| 
      
 642 
     | 
    
         
            +
                    in_txt = false
         
     | 
| 
       520 
643 
     | 
    
         
             
                  end
         
     | 
| 
       521 
     | 
    
         
            -
             
     | 
| 
       522 
     | 
    
         
            -
                  ret << "<#{txt8}> Tj " unless txt8.eql? ''
         
     | 
| 
       523 
644 
     | 
    
         
             
                end
         
     | 
| 
       524 
645 
     | 
    
         | 
| 
       525 
646 
     | 
    
         
             
                ret << "ET "
         
     | 
    
        data/lib/pdfbeads/pdffont.rb
    CHANGED
    
    | 
         @@ -266,7 +266,75 @@ class PDFBeads::PDFBuilder::FontDataProvider 
     | 
|
| 
       266 
266 
     | 
    
         
             
                  0x02DB => ["/ogonek", 333],
         
     | 
| 
       267 
267 
     | 
    
         
             
                  0x02DC => ["/tilde", 333],
         
     | 
| 
       268 
268 
     | 
    
         
             
                  0x02DD => ["/hungarumlaut", 333],
         
     | 
| 
      
 269 
     | 
    
         
            +
                  0x0338 => ["/Alphatonos", 722],
         
     | 
| 
      
 270 
     | 
    
         
            +
                  0x0388 => ["/Epsilontonos", 694],
         
     | 
| 
      
 271 
     | 
    
         
            +
                  0x0389 => ["/Etatonos", 808],
         
     | 
| 
      
 272 
     | 
    
         
            +
                  0x038A => ["/Iotatonos", 412],
         
     | 
| 
      
 273 
     | 
    
         
            +
                  0x038C => ["/Omicrontonos", 722],
         
     | 
| 
      
 274 
     | 
    
         
            +
                  0x038E => ["/Upsilontonos", 816],
         
     | 
| 
      
 275 
     | 
    
         
            +
                  0x038F => ["/Omegatonos", 744],
         
     | 
| 
      
 276 
     | 
    
         
            +
                  0x03AC => ["/alphatonos", 522],
         
     | 
| 
      
 277 
     | 
    
         
            +
                  0x03AD => ["/epsilontonos", 420],
         
     | 
| 
      
 278 
     | 
    
         
            +
                  0x03AE => ["/etatonos", 522],
         
     | 
| 
      
 279 
     | 
    
         
            +
                  0x03AF => ["/iotatonos", 268],
         
     | 
| 
      
 280 
     | 
    
         
            +
                  0x0390 => ["/iotadieresistonos", 268],
         
     | 
| 
      
 281 
     | 
    
         
            +
                  0x0391 => ["/Alpha", 722],
         
     | 
| 
      
 282 
     | 
    
         
            +
                  0x0392 => ["/Beta", 667],
         
     | 
| 
      
 283 
     | 
    
         
            +
                  0x0393 => ["/Gamma", 578],
         
     | 
| 
       269 
284 
     | 
    
         
             
                  0x0394 => ["/Delta", 643],
         
     | 
| 
      
 285 
     | 
    
         
            +
                  0x0395 => ["/Epsilon", 611],
         
     | 
| 
      
 286 
     | 
    
         
            +
                  0x0396 => ["/Zeta", 611],
         
     | 
| 
      
 287 
     | 
    
         
            +
                  0x0397 => ["/Eta", 722],
         
     | 
| 
      
 288 
     | 
    
         
            +
                  0x0398 => ["/Theta", 722],
         
     | 
| 
      
 289 
     | 
    
         
            +
                  0x0399 => ["/Iota", 333],
         
     | 
| 
      
 290 
     | 
    
         
            +
                  0x039A => ["/Kappa", 722],
         
     | 
| 
      
 291 
     | 
    
         
            +
                  0x039B => ["/Lambda", 724],
         
     | 
| 
      
 292 
     | 
    
         
            +
                  0x039C => ["/Mu", 889],
         
     | 
| 
      
 293 
     | 
    
         
            +
                  0x039D => ["/Nu", 722],
         
     | 
| 
      
 294 
     | 
    
         
            +
                  0x039E => ["/Xi", 643],
         
     | 
| 
      
 295 
     | 
    
         
            +
                  0x039F => ["/Omicron", 722],
         
     | 
| 
      
 296 
     | 
    
         
            +
                  0x03A0 => ["/Pi", 722],
         
     | 
| 
      
 297 
     | 
    
         
            +
                  0x03A1 => ["/Rho", 556],
         
     | 
| 
      
 298 
     | 
    
         
            +
                  0x03A3 => ["/Sigma", 582],
         
     | 
| 
      
 299 
     | 
    
         
            +
                  0x03A4 => ["/Tau", 611],
         
     | 
| 
      
 300 
     | 
    
         
            +
                  0x03A5 => ["/Upsilon", 722],
         
     | 
| 
      
 301 
     | 
    
         
            +
                  0x03A6 => ["/Phi", 730],
         
     | 
| 
      
 302 
     | 
    
         
            +
                  0x03A7 => ["/Chi", 722],
         
     | 
| 
      
 303 
     | 
    
         
            +
                  0x03A8 => ["/Psi", 737],
         
     | 
| 
      
 304 
     | 
    
         
            +
                  0x03A9 => ["/Omega", 744],
         
     | 
| 
      
 305 
     | 
    
         
            +
                  0x03AA => ["/Iotadieresis", 333],
         
     | 
| 
      
 306 
     | 
    
         
            +
                  0x03AB => ["/Upsilondieresis", 722],
         
     | 
| 
      
 307 
     | 
    
         
            +
                  0x03B0 => ["/upsilondieresistonos", 496],
         
     | 
| 
      
 308 
     | 
    
         
            +
                  0x03B1 => ["/alpha", 522],
         
     | 
| 
      
 309 
     | 
    
         
            +
                  0x03B2 => ["/beta", 508],
         
     | 
| 
      
 310 
     | 
    
         
            +
                  0x03B3 => ["/gamma", 440],
         
     | 
| 
      
 311 
     | 
    
         
            +
                  0x03B4 => ["/delta", 471],
         
     | 
| 
      
 312 
     | 
    
         
            +
                  0x03B5 => ["/epsilon", 420],
         
     | 
| 
      
 313 
     | 
    
         
            +
                  0x03B6 => ["/zeta", 414],
         
     | 
| 
      
 314 
     | 
    
         
            +
                  0x03B7 => ["/eta", 522],
         
     | 
| 
      
 315 
     | 
    
         
            +
                  0x03B8 => ["/theta", 480],
         
     | 
| 
      
 316 
     | 
    
         
            +
                  0x03B9 => ["/iota", 268],
         
     | 
| 
      
 317 
     | 
    
         
            +
                  0x03BA => ["/kappa", 502],
         
     | 
| 
      
 318 
     | 
    
         
            +
                  0x03BB => ["/lambda", 484],
         
     | 
| 
      
 319 
     | 
    
         
            +
                  0x03BC => ["/mu", 500],
         
     | 
| 
      
 320 
     | 
    
         
            +
                  0x03BD => ["/nu", 452],
         
     | 
| 
      
 321 
     | 
    
         
            +
                  0x03BE => ["/xi", 444],
         
     | 
| 
      
 322 
     | 
    
         
            +
                  0x03BF => ["/omicron", 500],
         
     | 
| 
      
 323 
     | 
    
         
            +
                  0x03C0 => ["/pi", 504],
         
     | 
| 
      
 324 
     | 
    
         
            +
                  0x03C1 => ["/rho", 500],
         
     | 
| 
      
 325 
     | 
    
         
            +
                  0x03C2 => ["/sigma1", 396],
         
     | 
| 
      
 326 
     | 
    
         
            +
                  0x03C3 => ["/sigma", 540],
         
     | 
| 
      
 327 
     | 
    
         
            +
                  0x03C4 => ["/tau", 400],
         
     | 
| 
      
 328 
     | 
    
         
            +
                  0x03C5 => ["/upsilon", 496],
         
     | 
| 
      
 329 
     | 
    
         
            +
                  0x03C6 => ["/phi", 578],
         
     | 
| 
      
 330 
     | 
    
         
            +
                  0x03C7 => ["/chi", 444],
         
     | 
| 
      
 331 
     | 
    
         
            +
                  0x03C8 => ["/psi", 624],
         
     | 
| 
      
 332 
     | 
    
         
            +
                  0x03C9 => ["/omega", 658],
         
     | 
| 
      
 333 
     | 
    
         
            +
                  0x03CA => ["/iotadieresis", 268],
         
     | 
| 
      
 334 
     | 
    
         
            +
                  0x03CB => ["/upsilondieresis", 496],
         
     | 
| 
      
 335 
     | 
    
         
            +
                  0x03CC => ["/omicrontonos", 500],
         
     | 
| 
      
 336 
     | 
    
         
            +
                  0x03CD => ["/upsilontonos", 496],
         
     | 
| 
      
 337 
     | 
    
         
            +
                  0x03CE => ["/omegatonos", 658],
         
     | 
| 
       270 
338 
     | 
    
         
             
                  0x0401 => ["/afii10023", 611],
         
     | 
| 
       271 
339 
     | 
    
         
             
                  0x0402 => ["/afii10051", 752],
         
     | 
| 
       272 
340 
     | 
    
         
             
                  0x0403 => ["/afii10052", 578],
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,13 +1,13 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification 
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: pdfbeads
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version 
         
     | 
| 
       4 
     | 
    
         
            -
              hash:  
     | 
| 
      
 4 
     | 
    
         
            +
              hash: 25
         
     | 
| 
       5 
5 
     | 
    
         
             
              prerelease: 
         
     | 
| 
       6 
6 
     | 
    
         
             
              segments: 
         
     | 
| 
       7 
7 
     | 
    
         
             
              - 1
         
     | 
| 
       8 
8 
     | 
    
         
             
              - 0
         
     | 
| 
       9 
     | 
    
         
            -
              -  
     | 
| 
       10 
     | 
    
         
            -
              version: 1.0. 
     | 
| 
      
 9 
     | 
    
         
            +
              - 7
         
     | 
| 
      
 10 
     | 
    
         
            +
              version: 1.0.7
         
     | 
| 
       11 
11 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       12 
12 
     | 
    
         
             
            authors: 
         
     | 
| 
       13 
13 
     | 
    
         
             
            - Alexey Kryukov
         
     | 
| 
         @@ -15,7 +15,7 @@ autorequire: 
     | 
|
| 
       15 
15 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       16 
16 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       17 
17 
     | 
    
         | 
| 
       18 
     | 
    
         
            -
            date: 2012-02- 
     | 
| 
      
 18 
     | 
    
         
            +
            date: 2012-02-10 00:00:00 +04:00
         
     | 
| 
       19 
19 
     | 
    
         
             
            default_executable: pdfbeads
         
     | 
| 
       20 
20 
     | 
    
         
             
            dependencies: []
         
     | 
| 
       21 
21 
     | 
    
         |