asposewordsjavaforruby 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gempackage +2 -2
- data/LICENSE +20 -20
- data/README.md +28 -2
- data/Rakefile +2 -2
- data/asposewordsjavaforruby.gemspec +27 -27
- data/config/aspose.yml +5 -5
- data/data/LoadTxt.txt +14 -14
- data/lib/asposewordsjavaforruby.rb +71 -71
- data/lib/asposewordsjavaforruby/addwatermark.rb +84 -84
- data/lib/asposewordsjavaforruby/appenddoc.rb +24 -24
- data/lib/asposewordsjavaforruby/appenddocument.rb +229 -229
- data/lib/asposewordsjavaforruby/applylicense.rb +16 -16
- data/lib/asposewordsjavaforruby/asposewordsjava.rb +23 -23
- data/lib/asposewordsjavaforruby/autofittables.rb +123 -123
- data/lib/asposewordsjavaforruby/bookmarks.rb +132 -132
- data/lib/asposewordsjavaforruby/checkformat.rb +70 -70
- data/lib/asposewordsjavaforruby/compressimages.rb +53 -53
- data/lib/asposewordsjavaforruby/doc2pdf.rb +15 -15
- data/lib/asposewordsjavaforruby/doctohtml.rb +26 -26
- data/lib/asposewordsjavaforruby/extractcontent.rb +395 -395
- data/lib/asposewordsjavaforruby/findandreplace.rb +29 -29
- data/lib/asposewordsjavaforruby/helloworld.rb +26 -26
- data/lib/asposewordsjavaforruby/imagetopdf.rb +71 -71
- data/lib/asposewordsjavaforruby/insertnestedfields.rb +39 -39
- data/lib/asposewordsjavaforruby/loadandsavetodisk.rb +20 -20
- data/lib/asposewordsjavaforruby/loadandsavetostream.rb +32 -32
- data/lib/asposewordsjavaforruby/loadtxt.rb +14 -14
- data/lib/asposewordsjavaforruby/mergefield.rb +45 -45
- data/lib/asposewordsjavaforruby/nodes.rb +29 -29
- data/lib/asposewordsjavaforruby/processcomments.rb +72 -72
- data/lib/asposewordsjavaforruby/removebreaks.rb +65 -65
- data/lib/asposewordsjavaforruby/removefield.rb +23 -23
- data/lib/asposewordsjavaforruby/saveasmultipagetiff.rb +26 -26
- data/lib/asposewordsjavaforruby/simplemailmerge.rb +23 -23
- data/lib/asposewordsjavaforruby/styles.rb +77 -77
- data/lib/asposewordsjavaforruby/updatefields.rb +62 -62
- data/lib/asposewordsjavaforruby/version.rb +3 -3
- metadata +18 -28
@@ -1,70 +1,70 @@
|
|
1
|
-
require 'fileutils'
|
2
|
-
module Asposewordsjavaforruby
|
3
|
-
module CheckFormat
|
4
|
-
def initialize()
|
5
|
-
# The path to the documents directory.
|
6
|
-
data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
|
7
|
-
|
8
|
-
@supported_dir = data_dir + 'OutSupported/'
|
9
|
-
file = Rjb::import("java.io.File").new(data_dir + 'joiningandappending/')
|
10
|
-
|
11
|
-
check_fromat(file)
|
12
|
-
end
|
13
|
-
|
14
|
-
def check_fromat(file)
|
15
|
-
files_list = file.listFiles()
|
16
|
-
load_format = Rjb::import('com.aspose.words.LoadFormat')
|
17
|
-
|
18
|
-
files_list.each do |file|
|
19
|
-
if(file.isDirectory()) then
|
20
|
-
next
|
21
|
-
end
|
22
|
-
|
23
|
-
name_only = file.getName()
|
24
|
-
puts name_only
|
25
|
-
file_name = file.getPath()
|
26
|
-
puts file_name
|
27
|
-
|
28
|
-
info_obj = Rjb::import('com.aspose.words.FileFormatUtil')
|
29
|
-
info = info_obj.detectFileFormat(file_name)
|
30
|
-
case info.getLoadFormat()
|
31
|
-
when load_format.DOC
|
32
|
-
puts "Microsoft Word 97-2003 document."
|
33
|
-
when load_format.DOT
|
34
|
-
puts "Microsoft Word 97-2003 template."
|
35
|
-
when load_format.DOCX
|
36
|
-
puts "Office Open XML WordprocessingML Macro-Free Document."
|
37
|
-
when load_format.DOCM
|
38
|
-
puts "Office Open XML WordprocessingML Macro-Enabled Document."
|
39
|
-
when load_format.DOTX
|
40
|
-
puts "Office Open XML WordprocessingML Macro-Free Template."
|
41
|
-
when load_format.DOTM
|
42
|
-
puts "Office Open XML WordprocessingML Macro-Enabled Template."
|
43
|
-
when load_format.FLAT_OPC
|
44
|
-
puts "Flat OPC document."
|
45
|
-
when load_format.RTF
|
46
|
-
puts "RTF format."
|
47
|
-
when load_format.WORD_ML
|
48
|
-
puts "Microsoft Word 2003 WordprocessingML format."
|
49
|
-
when load_format.HTML
|
50
|
-
puts "HTML format."
|
51
|
-
when load_format.MHTML
|
52
|
-
puts "MHTML (Web archive) format."
|
53
|
-
when load_format.ODT
|
54
|
-
puts "OpenDocument Text."
|
55
|
-
when load_format.OTT
|
56
|
-
puts "OpenDocument Text Template."
|
57
|
-
when load_format.DOC_PRE_WORD_97
|
58
|
-
puts "MS Word 6 or Word 95 format."
|
59
|
-
else load_format.UNKNOWN
|
60
|
-
puts "Unknown format."
|
61
|
-
end
|
62
|
-
|
63
|
-
dest_file_obj = Rjb::import("java.io.File").new(@supported_dir + name_only)
|
64
|
-
dest_File = dest_file_obj.getPath()
|
65
|
-
FileUtils.cp(file_name, dest_File)
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
end
|
70
|
-
end
|
1
|
+
require 'fileutils'
|
2
|
+
module Asposewordsjavaforruby
|
3
|
+
module CheckFormat
|
4
|
+
def initialize()
|
5
|
+
# The path to the documents directory.
|
6
|
+
data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
|
7
|
+
|
8
|
+
@supported_dir = data_dir + 'OutSupported/'
|
9
|
+
file = Rjb::import("java.io.File").new(data_dir + 'joiningandappending/')
|
10
|
+
|
11
|
+
check_fromat(file)
|
12
|
+
end
|
13
|
+
|
14
|
+
def check_fromat(file)
|
15
|
+
files_list = file.listFiles()
|
16
|
+
load_format = Rjb::import('com.aspose.words.LoadFormat')
|
17
|
+
|
18
|
+
files_list.each do |file|
|
19
|
+
if(file.isDirectory()) then
|
20
|
+
next
|
21
|
+
end
|
22
|
+
|
23
|
+
name_only = file.getName()
|
24
|
+
puts name_only
|
25
|
+
file_name = file.getPath()
|
26
|
+
puts file_name
|
27
|
+
|
28
|
+
info_obj = Rjb::import('com.aspose.words.FileFormatUtil')
|
29
|
+
info = info_obj.detectFileFormat(file_name)
|
30
|
+
case info.getLoadFormat()
|
31
|
+
when load_format.DOC
|
32
|
+
puts "Microsoft Word 97-2003 document."
|
33
|
+
when load_format.DOT
|
34
|
+
puts "Microsoft Word 97-2003 template."
|
35
|
+
when load_format.DOCX
|
36
|
+
puts "Office Open XML WordprocessingML Macro-Free Document."
|
37
|
+
when load_format.DOCM
|
38
|
+
puts "Office Open XML WordprocessingML Macro-Enabled Document."
|
39
|
+
when load_format.DOTX
|
40
|
+
puts "Office Open XML WordprocessingML Macro-Free Template."
|
41
|
+
when load_format.DOTM
|
42
|
+
puts "Office Open XML WordprocessingML Macro-Enabled Template."
|
43
|
+
when load_format.FLAT_OPC
|
44
|
+
puts "Flat OPC document."
|
45
|
+
when load_format.RTF
|
46
|
+
puts "RTF format."
|
47
|
+
when load_format.WORD_ML
|
48
|
+
puts "Microsoft Word 2003 WordprocessingML format."
|
49
|
+
when load_format.HTML
|
50
|
+
puts "HTML format."
|
51
|
+
when load_format.MHTML
|
52
|
+
puts "MHTML (Web archive) format."
|
53
|
+
when load_format.ODT
|
54
|
+
puts "OpenDocument Text."
|
55
|
+
when load_format.OTT
|
56
|
+
puts "OpenDocument Text Template."
|
57
|
+
when load_format.DOC_PRE_WORD_97
|
58
|
+
puts "MS Word 6 or Word 95 format."
|
59
|
+
else load_format.UNKNOWN
|
60
|
+
puts "Unknown format."
|
61
|
+
end
|
62
|
+
|
63
|
+
dest_file_obj = Rjb::import("java.io.File").new(@supported_dir + name_only)
|
64
|
+
dest_File = dest_file_obj.getPath()
|
65
|
+
FileUtils.cp(file_name, dest_File)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
@@ -1,53 +1,53 @@
|
|
1
|
-
module Asposewordsjavaforruby
|
2
|
-
module CompressImages
|
3
|
-
def initialize()
|
4
|
-
# The path to the documents directory.
|
5
|
-
@data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
|
6
|
-
srcFileName = @data_dir + "TestCompressImages.docx"
|
7
|
-
|
8
|
-
doc = Rjb::import('com.aspose.words.Document').new(@data_dir + "TestCompressImages.docx")
|
9
|
-
|
10
|
-
# Demonstrate autofitting a table to the window.
|
11
|
-
compress_images(doc, srcFileName)
|
12
|
-
end
|
13
|
-
|
14
|
-
def compress_images(doc, srcFileName)
|
15
|
-
messageFormat = Rjb::import("java.text.MessageFormat")
|
16
|
-
file_size = get_file_size(srcFileName)
|
17
|
-
|
18
|
-
# 220ppi Print - said to be excellent on most printers and screens.
|
19
|
-
# 150ppi Screen - said to be good for web pages and projectors.
|
20
|
-
# 96ppi Email - said to be good for minimal document size and sharing.
|
21
|
-
desiredPpi = 150
|
22
|
-
# In Java this seems to be a good compression / quality setting.
|
23
|
-
jpegQuality = 90
|
24
|
-
|
25
|
-
# Resample images to desired ppi and save.
|
26
|
-
resampler = Rjb::import("com.aspose.words.Resampler").new
|
27
|
-
count = resampler.resample(doc, desiredPpi, jpegQuality)
|
28
|
-
puts MessageFormat.format("Resampled {0} images.", count)
|
29
|
-
if (count != 1) then
|
30
|
-
puts "We expected to have only 1 image resampled in this test document!"
|
31
|
-
end
|
32
|
-
dstFileName = @data_dir + "TestCompressImages Out.docx"
|
33
|
-
doc.save(dstFileName)
|
34
|
-
puts messageFormat.format("Saving {0}. Size {1}.", dstFileName, get_file_size(dstFileName))
|
35
|
-
|
36
|
-
# Verify that the first image was compressed by checking the new Ppi.
|
37
|
-
dst_doc = Rjb::import("com.aspose.words.Document").new(dstFileName)
|
38
|
-
nodeType = Rjb::import("com.aspose.words.NodeType")
|
39
|
-
shape = dst_doc.getChild(nodeType.DRAWING_ML, 0, true)
|
40
|
-
convertUtil = Rjb::import("com.aspose.words.ConvertUtil")
|
41
|
-
imagePpi = shape.getImageData().getImageSize().getWidthPixels() / convertUtil.pointToInch(shape.getSize().getX())
|
42
|
-
if (imagePpi < 150) then
|
43
|
-
puts "Image was not resampled successfully."
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def get_file_size(file_name)
|
48
|
-
file = Rjb::import("java.io.File").new(file_name)
|
49
|
-
return file.length()
|
50
|
-
end
|
51
|
-
|
52
|
-
end
|
53
|
-
end
|
1
|
+
module Asposewordsjavaforruby
|
2
|
+
module CompressImages
|
3
|
+
def initialize()
|
4
|
+
# The path to the documents directory.
|
5
|
+
@data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
|
6
|
+
srcFileName = @data_dir + "TestCompressImages.docx"
|
7
|
+
|
8
|
+
doc = Rjb::import('com.aspose.words.Document').new(@data_dir + "TestCompressImages.docx")
|
9
|
+
|
10
|
+
# Demonstrate autofitting a table to the window.
|
11
|
+
compress_images(doc, srcFileName)
|
12
|
+
end
|
13
|
+
|
14
|
+
def compress_images(doc, srcFileName)
|
15
|
+
messageFormat = Rjb::import("java.text.MessageFormat")
|
16
|
+
file_size = get_file_size(srcFileName)
|
17
|
+
|
18
|
+
# 220ppi Print - said to be excellent on most printers and screens.
|
19
|
+
# 150ppi Screen - said to be good for web pages and projectors.
|
20
|
+
# 96ppi Email - said to be good for minimal document size and sharing.
|
21
|
+
desiredPpi = 150
|
22
|
+
# In Java this seems to be a good compression / quality setting.
|
23
|
+
jpegQuality = 90
|
24
|
+
|
25
|
+
# Resample images to desired ppi and save.
|
26
|
+
resampler = Rjb::import("com.aspose.words.Resampler").new
|
27
|
+
count = resampler.resample(doc, desiredPpi, jpegQuality)
|
28
|
+
puts MessageFormat.format("Resampled {0} images.", count)
|
29
|
+
if (count != 1) then
|
30
|
+
puts "We expected to have only 1 image resampled in this test document!"
|
31
|
+
end
|
32
|
+
dstFileName = @data_dir + "TestCompressImages Out.docx"
|
33
|
+
doc.save(dstFileName)
|
34
|
+
puts messageFormat.format("Saving {0}. Size {1}.", dstFileName, get_file_size(dstFileName))
|
35
|
+
|
36
|
+
# Verify that the first image was compressed by checking the new Ppi.
|
37
|
+
dst_doc = Rjb::import("com.aspose.words.Document").new(dstFileName)
|
38
|
+
nodeType = Rjb::import("com.aspose.words.NodeType")
|
39
|
+
shape = dst_doc.getChild(nodeType.DRAWING_ML, 0, true)
|
40
|
+
convertUtil = Rjb::import("com.aspose.words.ConvertUtil")
|
41
|
+
imagePpi = shape.getImageData().getImageSize().getWidthPixels() / convertUtil.pointToInch(shape.getSize().getX())
|
42
|
+
if (imagePpi < 150) then
|
43
|
+
puts "Image was not resampled successfully."
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def get_file_size(file_name)
|
48
|
+
file = Rjb::import("java.io.File").new(file_name)
|
49
|
+
return file.length()
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
@@ -1,15 +1,15 @@
|
|
1
|
-
module Asposewordsjavaforruby
|
2
|
-
module Doc2Pdf
|
3
|
-
|
4
|
-
def doc_to_pdf()
|
5
|
-
data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
|
6
|
-
|
7
|
-
# Open document.
|
8
|
-
document = Rjb::import('com.aspose.words.Document').new(data_dir + "Template.doc")
|
9
|
-
|
10
|
-
# Save the document in PDF format.
|
11
|
-
document.save(data_dir + "Doc2PdfSave Out.pdf")
|
12
|
-
end
|
13
|
-
|
14
|
-
end
|
15
|
-
end
|
1
|
+
module Asposewordsjavaforruby
|
2
|
+
module Doc2Pdf
|
3
|
+
|
4
|
+
def doc_to_pdf()
|
5
|
+
data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
|
6
|
+
|
7
|
+
# Open document.
|
8
|
+
document = Rjb::import('com.aspose.words.Document').new(data_dir + "Template.doc")
|
9
|
+
|
10
|
+
# Save the document in PDF format.
|
11
|
+
document.save(data_dir + "Doc2PdfSave Out.pdf")
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
end
|
@@ -1,26 +1,26 @@
|
|
1
|
-
module Asposewordsjavaforruby
|
2
|
-
module DocToHTML
|
3
|
-
def initialize()
|
4
|
-
# The path to the documents directory.
|
5
|
-
data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
|
6
|
-
|
7
|
-
# Open the document.
|
8
|
-
doc = Rjb::import('com.aspose.words.Document').new(data_dir + "TestFile.doc")
|
9
|
-
|
10
|
-
#HtmlSaveOptions options = new HtmlSaveOptions();
|
11
|
-
options = Rjb::import('com.aspose.words.HtmlSaveOptions').new
|
12
|
-
|
13
|
-
# HtmlSaveOptions.ExportRoundtripInformation property specifies
|
14
|
-
# whether to write the roundtrip information when saving to HTML, MHTML or EPUB.
|
15
|
-
# Default value is true for HTML and false for MHTML and EPUB.
|
16
|
-
options.setExportRoundtripInformation(true)
|
17
|
-
doc.save(data_dir + "ExportRoundtripInformation Out.html", options)
|
18
|
-
|
19
|
-
doc = Rjb::import('com.aspose.words.Document').new(data_dir + "ExportRoundtripInformation Out.html")
|
20
|
-
|
21
|
-
# Save the document Docx file format
|
22
|
-
save_format = Rjb::import('com.aspose.words.SaveFormat')
|
23
|
-
doc.save(data_dir + "Out.docx", save_format.DOCX)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
1
|
+
module Asposewordsjavaforruby
|
2
|
+
module DocToHTML
|
3
|
+
def initialize()
|
4
|
+
# The path to the documents directory.
|
5
|
+
data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
|
6
|
+
|
7
|
+
# Open the document.
|
8
|
+
doc = Rjb::import('com.aspose.words.Document').new(data_dir + "TestFile.doc")
|
9
|
+
|
10
|
+
#HtmlSaveOptions options = new HtmlSaveOptions();
|
11
|
+
options = Rjb::import('com.aspose.words.HtmlSaveOptions').new
|
12
|
+
|
13
|
+
# HtmlSaveOptions.ExportRoundtripInformation property specifies
|
14
|
+
# whether to write the roundtrip information when saving to HTML, MHTML or EPUB.
|
15
|
+
# Default value is true for HTML and false for MHTML and EPUB.
|
16
|
+
options.setExportRoundtripInformation(true)
|
17
|
+
doc.save(data_dir + "ExportRoundtripInformation Out.html", options)
|
18
|
+
|
19
|
+
doc = Rjb::import('com.aspose.words.Document').new(data_dir + "ExportRoundtripInformation Out.html")
|
20
|
+
|
21
|
+
# Save the document Docx file format
|
22
|
+
save_format = Rjb::import('com.aspose.words.SaveFormat')
|
23
|
+
doc.save(data_dir + "Out.docx", save_format.DOCX)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -1,395 +1,395 @@
|
|
1
|
-
module Asposewordsjavaforruby
|
2
|
-
module ExtractContent
|
3
|
-
def initialize()
|
4
|
-
# The path to the documents directory.
|
5
|
-
@data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/document/'
|
6
|
-
|
7
|
-
# Open the document.
|
8
|
-
doc = Rjb::import('com.aspose.words.Document').new(@data_dir + "TestFile.doc")
|
9
|
-
|
10
|
-
extract_content_between_paragraphs(doc)
|
11
|
-
extract_content_between_block_level_nodes(doc)
|
12
|
-
extract_content_between_paragraph_styles(doc)
|
13
|
-
extract_content_between_runs(doc)
|
14
|
-
extract_content_using_field(doc)
|
15
|
-
extract_content_between_bookmark(doc)
|
16
|
-
extract_content_between_comment_range(doc)
|
17
|
-
end
|
18
|
-
|
19
|
-
def extract_content_between_paragraphs(doc)
|
20
|
-
# Gather the nodes. The GetChild method uses 0-based index
|
21
|
-
node_type = Rjb::import("com.aspose.words.NodeType")
|
22
|
-
start_para = doc.getFirstSection().getChild(node_type.PARAGRAPH, 6, true)
|
23
|
-
end_para = doc.getFirstSection().getChild(node_type.PARAGRAPH, 10, true)
|
24
|
-
|
25
|
-
# Extract the content between these nodes in the document. Include these markers in the extraction.
|
26
|
-
extracted_nodes = extract_contents(start_para, end_para, true)
|
27
|
-
|
28
|
-
# Insert the content into a new separate document and save it to disk.
|
29
|
-
dst_doc = generate_document(doc, extracted_nodes)
|
30
|
-
dst_doc.save(@data_dir + "TestFile.Paragraphs Out.doc")
|
31
|
-
end
|
32
|
-
|
33
|
-
def extract_content_between_block_level_nodes(doc)
|
34
|
-
# Gather the nodes. The GetChild method uses 0-based index
|
35
|
-
node_type = Rjb::import("com.aspose.words.NodeType")
|
36
|
-
start_para = doc.getLastSection().getChild(node_type.PARAGRAPH, 2, true)
|
37
|
-
end_table = doc.getLastSection().getChild(node_type.TABLE, 0, true)
|
38
|
-
|
39
|
-
# Extract the content between these nodes in the document. Include these markers in the extraction.
|
40
|
-
extracted_nodes = extract_contents(start_para, end_table, true)
|
41
|
-
|
42
|
-
# Lets reverse the array to make inserting the content back into the document easier.
|
43
|
-
collections = Rjb::import("java.util.Collections")
|
44
|
-
collections.reverse(extracted_nodes)
|
45
|
-
|
46
|
-
while extracted_nodes.size() > 0 do
|
47
|
-
# Insert the last node from the reversed list
|
48
|
-
end_table.getParentNode().insertAfter(extracted_nodes.get(0), end_table)
|
49
|
-
# Remove this node from the list after insertion.
|
50
|
-
extracted_nodes.remove(0)
|
51
|
-
end
|
52
|
-
|
53
|
-
# Save the generated document to disk.
|
54
|
-
doc.save(@data_dir + "TestFile.DuplicatedContent Out.doc")
|
55
|
-
end
|
56
|
-
|
57
|
-
def extract_content_between_paragraph_styles(doc)
|
58
|
-
# Gather a list of the paragraphs using the respective heading styles.
|
59
|
-
paras_style_heading1 = paragraphs_by_style_name(doc, "Heading 1")
|
60
|
-
paras_style_heading3 = paragraphs_by_style_name(doc, "Heading 3")
|
61
|
-
|
62
|
-
# Use the first instance of the paragraphs with those styles.
|
63
|
-
start_para1 = paras_style_heading1.get(0)
|
64
|
-
end_para1 = paras_style_heading3.get(0)
|
65
|
-
|
66
|
-
# Extract the content between these nodes in the document. Don't include these markers in the extraction.
|
67
|
-
extracted_nodes = extract_contents(start_para1, end_para1, false)
|
68
|
-
|
69
|
-
# Insert the content into a new separate document and save it to disk.
|
70
|
-
dst_doc = generate_document(doc, extracted_nodes)
|
71
|
-
dst_doc.save(@data_dir + "TestFile.Styles Out.doc")
|
72
|
-
end
|
73
|
-
|
74
|
-
def extract_content_between_runs(doc)
|
75
|
-
# Retrieve a paragraph from the first section.
|
76
|
-
node_type = Rjb::import("com.aspose.words.NodeType")
|
77
|
-
para = doc.getChild(node_type.PARAGRAPH, 7, true)
|
78
|
-
|
79
|
-
# Use some runs for extraction.
|
80
|
-
start_run = para.getRuns().get(1)
|
81
|
-
end_run = para.getRuns().get(4)
|
82
|
-
|
83
|
-
# Extract the content between these nodes in the document. Include these markers in the extraction.
|
84
|
-
extracted_nodes = extract_contents(start_run, end_run, true)
|
85
|
-
|
86
|
-
# Get the node from the list. There should only be one paragraph returned in the list.
|
87
|
-
node = extracted_nodes.get(0)
|
88
|
-
|
89
|
-
# Print the text of this node to the console.
|
90
|
-
save_format = Rjb::import("com.aspose.words.SaveFormat")
|
91
|
-
puts node.toString(save_format.TEXT)
|
92
|
-
end
|
93
|
-
|
94
|
-
def extract_content_using_field(doc)
|
95
|
-
# Use a document builder to retrieve the field start of a merge field.
|
96
|
-
builder = Rjb::import("com.aspose.words.DocumentBuilder").new(doc)
|
97
|
-
|
98
|
-
# Pass the first boolean parameter to get the DocumentBuilder to move to the FieldStart of the field.
|
99
|
-
# We could also get FieldStarts of a field using GetChildNode method as in the other examples.
|
100
|
-
builder.moveToMergeField("Fullname", false, false)
|
101
|
-
|
102
|
-
#/ The builder cursor should be positioned at the start of the field.
|
103
|
-
node_type = Rjb::import("com.aspose.words.NodeType")
|
104
|
-
start_field = builder.getCurrentNode()
|
105
|
-
end_para = doc.getFirstSection().getChild(node_type.PARAGRAPH, 5, true)
|
106
|
-
|
107
|
-
# Extract the content between these nodes in the document. Don't include these markers in the extraction.
|
108
|
-
extracted_nodes = extract_contents(start_field, end_para, false)
|
109
|
-
|
110
|
-
# Insert the content into a new separate document and save it to disk.
|
111
|
-
dst_doc = generate_document(doc, extracted_nodes)
|
112
|
-
dst_doc.save(@data_dir + "TestFile.Fields Out.doc")
|
113
|
-
end
|
114
|
-
|
115
|
-
def extract_content_between_bookmark(doc)
|
116
|
-
# Retrieve the bookmark from the document.
|
117
|
-
bookmark = doc.getRange().getBookmarks().get("Bookmark1")
|
118
|
-
|
119
|
-
# We use the BookmarkStart and BookmarkEnd nodes as markers.
|
120
|
-
bookmark_start = bookmark.getBookmarkStart()
|
121
|
-
bookmark_end = bookmark.getBookmarkEnd()
|
122
|
-
|
123
|
-
# Firstly extract the content between these nodes including the bookmark.
|
124
|
-
extracted_nodes_inclusive = extract_contents(bookmark_start, bookmark_end, true)
|
125
|
-
dst_doc = generate_document(doc, extracted_nodes_inclusive)
|
126
|
-
dst_doc.save(@data_dir + "TestFile.BookmarkInclusive Out.doc")
|
127
|
-
|
128
|
-
# Secondly extract the content between these nodes this time without including the bookmark.
|
129
|
-
extracted_nodes_exclusive = extract_contents(bookmark_start, bookmark_end, false)
|
130
|
-
dst_doc = generate_document(doc, extracted_nodes_exclusive)
|
131
|
-
dst_doc.save(@data_dir + "TestFile.BookmarkExclusive Out.doc")
|
132
|
-
end
|
133
|
-
|
134
|
-
def extract_content_between_comment_range(doc)
|
135
|
-
# This is a quick way of getting both comment nodes.
|
136
|
-
# Your code should have a proper method of retrieving each corresponding start and end node.
|
137
|
-
node_type = Rjb::import("com.aspose.words.NodeType")
|
138
|
-
comment_start = doc.getChild(node_type.COMMENT_RANGE_START, 0, true)
|
139
|
-
comment_end = doc.getChild(node_type.COMMENT_RANGE_END, 0, true)
|
140
|
-
|
141
|
-
# Firstly extract the content between these nodes including the bookmark.
|
142
|
-
extracted_nodes_inclusive = extract_contents(comment_start, comment_end, true)
|
143
|
-
dst_doc = generate_document(doc, extracted_nodes_inclusive)
|
144
|
-
dst_doc.save(@data_dir + "TestFile.CommentInclusive Out.doc")
|
145
|
-
|
146
|
-
# Secondly extract the content between these nodes this time without including the bookmark.
|
147
|
-
extracted_nodes_exclusive = extract_contents(comment_start, comment_end, false)
|
148
|
-
dst_doc = generate_document(doc, extracted_nodes_exclusive)
|
149
|
-
dst_doc.save(@data_dir + "TestFile.CommentExclusive Out.doc")
|
150
|
-
end
|
151
|
-
|
152
|
-
=begin
|
153
|
-
This is a method which extracts blocks of content from a document between specified nodes.
|
154
|
-
|
155
|
-
Extracts a range of nodes from a document found between specified markers and returns a copy of those nodes. Content can be extracted
|
156
|
-
between inline nodes, block level nodes, and also special nodes such as Comment or Boomarks. Any combination of different marker types can used.
|
157
|
-
|
158
|
-
@param string startNode The node which defines where to start the extraction from the document. This node can be block or inline level of a body.
|
159
|
-
@param string endNode The node which defines where to stop the extraction from the document. This node can be block or inline level of body.
|
160
|
-
@param boolean isInclusive Should the marker nodes be included.
|
161
|
-
=end
|
162
|
-
def extract_contents(startNode, endNode, isInclusive)
|
163
|
-
# First check that the nodes passed to this method are valid for use.
|
164
|
-
verify_parameter_nodes(startNode, endNode)
|
165
|
-
|
166
|
-
# Create a list to store the extracted nodes.
|
167
|
-
nodes = Rjb::import("java.util.ArrayList").new
|
168
|
-
|
169
|
-
# Keep a record of the original nodes passed to this method so we can split marker nodes if needed.
|
170
|
-
originalStartNode = startNode
|
171
|
-
originalEndNode = endNode
|
172
|
-
|
173
|
-
# Extract content based on block level nodes (paragraphs and tables). Traverse through parent nodes to find them.
|
174
|
-
# We will split the content of first and last nodes depending if the marker nodes are inline
|
175
|
-
node_type = Rjb::import("com.aspose.words.NodeType")
|
176
|
-
|
177
|
-
while (startNode.getParentNode().getNodeType() != node_type.BODY) do
|
178
|
-
startNode = startNode.getParentNode()
|
179
|
-
end
|
180
|
-
|
181
|
-
while (endNode.getParentNode().getNodeType() != node_type.BODY) do
|
182
|
-
endNode = endNode.getParentNode()
|
183
|
-
end
|
184
|
-
|
185
|
-
isExtracting = true
|
186
|
-
isStartingNode = true
|
187
|
-
isEndingNode = ''
|
188
|
-
#The current node we are extracting from the document.
|
189
|
-
currNode = startNode
|
190
|
-
|
191
|
-
#Begin extracting content. Process all block level nodes and specifically split the first and last nodes when needed so paragraph formatting is retained.
|
192
|
-
# Method is little more complex than a regular extractor as we need to factor in extracting using inline nodes, fields, bookmarks etc as to make it really useful.
|
193
|
-
while (isExtracting) do
|
194
|
-
# Clone the current node and its children to obtain a copy.
|
195
|
-
cloneNode = currNode.deepClone(true)
|
196
|
-
isEndingNode = currNode.equals(endNode)
|
197
|
-
|
198
|
-
if (isStartingNode || isEndingNode) then
|
199
|
-
# We need to process each marker separately so pass it off to a separate method instead.
|
200
|
-
if (isStartingNode) then
|
201
|
-
process_marker(cloneNode, nodes, originalStartNode, isInclusive, isStartingNode, isEndingNode)
|
202
|
-
isStartingNode = false
|
203
|
-
end
|
204
|
-
# Conditional needs to be separate as the block level start and end markers maybe the same node.
|
205
|
-
if (isEndingNode) then
|
206
|
-
process_marker(cloneNode, nodes, originalEndNode, isInclusive, isStartingNode, isEndingNode)
|
207
|
-
isExtracting = false
|
208
|
-
end
|
209
|
-
else
|
210
|
-
# Node is not a start or end marker, simply add the copy to the list.
|
211
|
-
nodes.add(cloneNode)
|
212
|
-
end
|
213
|
-
|
214
|
-
# Move to the next node and extract it. If next node is null that means the rest of the content is found in a different section.
|
215
|
-
#if (currNode.getNextSibling() == null && isExtracting) then
|
216
|
-
if ((currNode.getNextSibling()).nil? && isExtracting) then
|
217
|
-
# Move to the next section.
|
218
|
-
nodeType = Rjb::import("com.aspose.words.NodeType")
|
219
|
-
nextSection = currNode.getAncestor(nodeType.SECTION).getNextSibling()
|
220
|
-
currNode = nextSection.getBody().getFirstChild()
|
221
|
-
else
|
222
|
-
# Move to the next node in the body.
|
223
|
-
currNode = currNode.getNextSibling()
|
224
|
-
end
|
225
|
-
end
|
226
|
-
# Return the nodes between the node markers.
|
227
|
-
nodes
|
228
|
-
end
|
229
|
-
|
230
|
-
=begin
|
231
|
-
Checks the input parameters are correct and can be used. Throws an exception if there is any problem.
|
232
|
-
=end
|
233
|
-
def verify_parameter_nodes(startNode, endNode)
|
234
|
-
# The order in which these checks are done is important.
|
235
|
-
raise 'Start node cannot be null' if startNode.nil?
|
236
|
-
raise 'End node cannot be null' if endNode.nil?
|
237
|
-
raise "Start node and end node must belong to the same document" if (startNode.getDocument() == endNode.getDocument())
|
238
|
-
|
239
|
-
nodeType = Rjb::import("com.aspose.words.NodeType")
|
240
|
-
#raise "Start node and end node must be a child or descendant of a body" if (startNode.getAncestor(nodeType.BODY) == '' || endNode.getAncestor(nodeType.BODY) == '')
|
241
|
-
raise "Start node and end node must be a child or descendant of a body" if (startNode.getAncestor(nodeType.BODY).nil? || endNode.getAncestor(nodeType.BODY).nil?)
|
242
|
-
|
243
|
-
# Check the end node is after the start node in the DOM tree
|
244
|
-
# First check if they are in different sections, then if they're not check their position in the body of the same section they are in.
|
245
|
-
startSection = startNode.getAncestor(nodeType.SECTION)
|
246
|
-
endSection = endNode.getAncestor(nodeType.SECTION)
|
247
|
-
startIndex = startSection.getParentNode().indexOf(startSection)
|
248
|
-
endIndex = endSection.getParentNode().indexOf(endSection)
|
249
|
-
|
250
|
-
if (startIndex == endIndex) then
|
251
|
-
raise "The end node must be after the start node in the body" if (startSection.getBody().indexOf(startNode) > endSection.getBody().indexOf(endNode))
|
252
|
-
elsif (startIndex > endIndex) then
|
253
|
-
raise "The section of end node must be after the section start node"
|
254
|
-
end
|
255
|
-
end
|
256
|
-
|
257
|
-
def generate_document(src_doc, nodes)
|
258
|
-
# Create a blank document.
|
259
|
-
dst_doc = Rjb::import("com.aspose.words.Document").new
|
260
|
-
|
261
|
-
# Remove the first paragraph from the empty document.
|
262
|
-
dst_doc.getFirstSection().getBody().removeAllChildren()
|
263
|
-
|
264
|
-
# Import each node from the list into the new document. Keep the original formatting of the node.
|
265
|
-
import_format_mode = Rjb::import("com.aspose.words.ImportFormatMode")
|
266
|
-
importer = Rjb::import("com.aspose.words.NodeImporter").new(src_doc, dst_doc, import_format_mode.KEEP_SOURCE_FORMATTING)
|
267
|
-
|
268
|
-
i = 0
|
269
|
-
while i < nodes.size
|
270
|
-
node = nodes.get(i)
|
271
|
-
import_node = importer.importNode(node, true)
|
272
|
-
dst_doc.getFirstSection().getBody().appendChild(import_node)
|
273
|
-
i +=1
|
274
|
-
end
|
275
|
-
|
276
|
-
# Return the generated document.
|
277
|
-
dst_doc
|
278
|
-
end
|
279
|
-
|
280
|
-
def process_marker(cloneNode, nodes, node, isInclusive, isStartMarker, isEndMarker)
|
281
|
-
# If we are dealing with a block level node just see if it should be included and add it to the list.
|
282
|
-
if (!is_inline(node)) then
|
283
|
-
# Don't add the node twice if the markers are the same node
|
284
|
-
if(!(isStartMarker && isEndMarker)) then
|
285
|
-
if (isInclusive) then
|
286
|
-
nodes.add(cloneNode)
|
287
|
-
end
|
288
|
-
end
|
289
|
-
return
|
290
|
-
end
|
291
|
-
|
292
|
-
# If a marker is a FieldStart node check if it's to be included or not.
|
293
|
-
# We assume for simplicity that the FieldStart and FieldEnd appear in the same paragraph.
|
294
|
-
nodeType = Rjb::import("com.aspose.words.NodeType")
|
295
|
-
if (node.getNodeType() == nodeType.FIELD_START) then
|
296
|
-
# If the marker is a start node and is not be included then skip to the end of the field.
|
297
|
-
# If the marker is an end node and it is to be included then move to the end field so the field will not be removed.
|
298
|
-
#if ((isStartMarker && !isInclusive) || (!isStartMarker && isInclusive)) then
|
299
|
-
if ((isStartMarker && isInclusive.nil?) || (!isStartMarker && isInclusive)) then
|
300
|
-
#while (node.getNextSibling() != null && node.getNodeType() != nodeType.FIELD_END) do
|
301
|
-
while (node.getNextSibling().nil? && (node.getNodeType() != nodeType.FIELD_END)) do
|
302
|
-
node = node.getNextSibling()
|
303
|
-
end
|
304
|
-
end
|
305
|
-
end
|
306
|
-
|
307
|
-
# If either marker is part of a comment then to include the comment itself we need to move the pointer forward to the Comment
|
308
|
-
# node found after the CommentRangeEnd node.
|
309
|
-
if (node.getNodeType() == nodeType.COMMENT_RANGE_END) then
|
310
|
-
while (node.getNextSibling().nil? && (node.getNodeType() != nodeType.COMMENT)) do
|
311
|
-
node = node.getNextSibling()
|
312
|
-
end
|
313
|
-
end
|
314
|
-
|
315
|
-
# Find the corresponding node in our cloned node by index and return it.
|
316
|
-
# If the start and end node are the same some child nodes might already have been removed. Subtract the
|
317
|
-
# difference to get the right index.
|
318
|
-
indexDiff = (node.getParentNode().getChildNodes().getCount() - cloneNode.getChildNodes().getCount())
|
319
|
-
|
320
|
-
# Child node count identical.
|
321
|
-
if (indexDiff == 0) then
|
322
|
-
node = cloneNode.getChildNodes().get(node.getParentNode().indexOf(node))
|
323
|
-
else
|
324
|
-
node = cloneNode.getChildNodes().get(node.getParentNode().indexOf(node) - indexDiff)
|
325
|
-
end
|
326
|
-
|
327
|
-
# Remove the nodes up to/from the marker.
|
328
|
-
isSkip = ''
|
329
|
-
isProcessing = true
|
330
|
-
isRemoving = isStartMarker
|
331
|
-
nextNode = cloneNode.getFirstChild()
|
332
|
-
#while (isProcessing && nextNode != null) do
|
333
|
-
unless (isProcessing && nextNode.nil?)
|
334
|
-
currentNode = nextNode
|
335
|
-
isSkip = false
|
336
|
-
if (currentNode == node) then
|
337
|
-
if (isStartMarker) then
|
338
|
-
isProcessing = false
|
339
|
-
if isInclusive then
|
340
|
-
isRemoving = false
|
341
|
-
end
|
342
|
-
else
|
343
|
-
isRemoving = true
|
344
|
-
if isInclusive then
|
345
|
-
isSkip = true
|
346
|
-
end
|
347
|
-
end
|
348
|
-
end
|
349
|
-
nextNode = nextNode.getNextSibling()
|
350
|
-
#if (isRemoving && !isSkip) then
|
351
|
-
if (isRemoving && isSkip==false) then
|
352
|
-
currentNode.remove()
|
353
|
-
end
|
354
|
-
end
|
355
|
-
|
356
|
-
# After processing the composite node may become empty. If it has don't include it.
|
357
|
-
if (!(isStartMarker && isEndMarker)) then
|
358
|
-
if cloneNode.hasChildNodes() then
|
359
|
-
nodes.add(cloneNode)
|
360
|
-
end
|
361
|
-
end
|
362
|
-
end
|
363
|
-
|
364
|
-
def is_inline(node)
|
365
|
-
# Test if the node is desendant of a Paragraph or Table node and also is not a paragraph or a table a paragraph inside a comment class which is decesant of a pararaph is possible.
|
366
|
-
node_type = Rjb::import("com.aspose.words.NodeType")
|
367
|
-
#return ((node.getAncestor(node_type.PARAGRAPH) != null) || (node.getAncestor(node_type.TABLE) != null) && !(node.getNodeType() == nodeType.PARAGRAPH) || (node.getNodeType() == nodeType.TABLE))
|
368
|
-
return ((node.getAncestor(node_type.PARAGRAPH).nil?) || (node.getAncestor(node_type.TABLE).nil?) && !(node.getNodeType() == node_type.PARAGRAPH) || (node.getNodeType() == node_type.TABLE))
|
369
|
-
end
|
370
|
-
|
371
|
-
def paragraphs_by_style_name(doc, style_name)
|
372
|
-
# Create an array to collect paragraphs of the specified style.
|
373
|
-
paragraphsWithStyle = Rjb::import("java.util.ArrayList").new
|
374
|
-
|
375
|
-
# Get all paragraphs from the document.
|
376
|
-
node_type = Rjb::import("com.aspose.words.NodeType")
|
377
|
-
paragraphs = doc.getChildNodes(node_type.PARAGRAPH, true)
|
378
|
-
paragraphs_count = paragraphs.getCount()
|
379
|
-
#paragraphs_count = java_values($paragraphs_count)
|
380
|
-
|
381
|
-
# Look through all paragraphs to find those with the specified style.
|
382
|
-
i = 0
|
383
|
-
while (i < paragraphs_count) do
|
384
|
-
paragraphs = doc.getChildNodes(node_type.PARAGRAPH, true)
|
385
|
-
paragraph = paragraphs.get(i)
|
386
|
-
if (paragraph.getParagraphFormat().getStyle().getName() == style_name) then
|
387
|
-
paragraphsWithStyle.add(paragraph)
|
388
|
-
end
|
389
|
-
i = i + 1
|
390
|
-
end
|
391
|
-
paragraphsWithStyle
|
392
|
-
end
|
393
|
-
|
394
|
-
end
|
395
|
-
end
|
1
|
+
module Asposewordsjavaforruby
|
2
|
+
module ExtractContent
|
3
|
+
def initialize()
|
4
|
+
# The path to the documents directory.
|
5
|
+
@data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/document/'
|
6
|
+
|
7
|
+
# Open the document.
|
8
|
+
doc = Rjb::import('com.aspose.words.Document').new(@data_dir + "TestFile.doc")
|
9
|
+
|
10
|
+
extract_content_between_paragraphs(doc)
|
11
|
+
extract_content_between_block_level_nodes(doc)
|
12
|
+
extract_content_between_paragraph_styles(doc)
|
13
|
+
extract_content_between_runs(doc)
|
14
|
+
extract_content_using_field(doc)
|
15
|
+
extract_content_between_bookmark(doc)
|
16
|
+
extract_content_between_comment_range(doc)
|
17
|
+
end
|
18
|
+
|
19
|
+
def extract_content_between_paragraphs(doc)
|
20
|
+
# Gather the nodes. The GetChild method uses 0-based index
|
21
|
+
node_type = Rjb::import("com.aspose.words.NodeType")
|
22
|
+
start_para = doc.getFirstSection().getChild(node_type.PARAGRAPH, 6, true)
|
23
|
+
end_para = doc.getFirstSection().getChild(node_type.PARAGRAPH, 10, true)
|
24
|
+
|
25
|
+
# Extract the content between these nodes in the document. Include these markers in the extraction.
|
26
|
+
extracted_nodes = extract_contents(start_para, end_para, true)
|
27
|
+
|
28
|
+
# Insert the content into a new separate document and save it to disk.
|
29
|
+
dst_doc = generate_document(doc, extracted_nodes)
|
30
|
+
dst_doc.save(@data_dir + "TestFile.Paragraphs Out.doc")
|
31
|
+
end
|
32
|
+
|
33
|
+
def extract_content_between_block_level_nodes(doc)
|
34
|
+
# Gather the nodes. The GetChild method uses 0-based index
|
35
|
+
node_type = Rjb::import("com.aspose.words.NodeType")
|
36
|
+
start_para = doc.getLastSection().getChild(node_type.PARAGRAPH, 2, true)
|
37
|
+
end_table = doc.getLastSection().getChild(node_type.TABLE, 0, true)
|
38
|
+
|
39
|
+
# Extract the content between these nodes in the document. Include these markers in the extraction.
|
40
|
+
extracted_nodes = extract_contents(start_para, end_table, true)
|
41
|
+
|
42
|
+
# Lets reverse the array to make inserting the content back into the document easier.
|
43
|
+
collections = Rjb::import("java.util.Collections")
|
44
|
+
collections.reverse(extracted_nodes)
|
45
|
+
|
46
|
+
while extracted_nodes.size() > 0 do
|
47
|
+
# Insert the last node from the reversed list
|
48
|
+
end_table.getParentNode().insertAfter(extracted_nodes.get(0), end_table)
|
49
|
+
# Remove this node from the list after insertion.
|
50
|
+
extracted_nodes.remove(0)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Save the generated document to disk.
|
54
|
+
doc.save(@data_dir + "TestFile.DuplicatedContent Out.doc")
|
55
|
+
end
|
56
|
+
|
57
|
+
def extract_content_between_paragraph_styles(doc)
|
58
|
+
# Gather a list of the paragraphs using the respective heading styles.
|
59
|
+
paras_style_heading1 = paragraphs_by_style_name(doc, "Heading 1")
|
60
|
+
paras_style_heading3 = paragraphs_by_style_name(doc, "Heading 3")
|
61
|
+
|
62
|
+
# Use the first instance of the paragraphs with those styles.
|
63
|
+
start_para1 = paras_style_heading1.get(0)
|
64
|
+
end_para1 = paras_style_heading3.get(0)
|
65
|
+
|
66
|
+
# Extract the content between these nodes in the document. Don't include these markers in the extraction.
|
67
|
+
extracted_nodes = extract_contents(start_para1, end_para1, false)
|
68
|
+
|
69
|
+
# Insert the content into a new separate document and save it to disk.
|
70
|
+
dst_doc = generate_document(doc, extracted_nodes)
|
71
|
+
dst_doc.save(@data_dir + "TestFile.Styles Out.doc")
|
72
|
+
end
|
73
|
+
|
74
|
+
def extract_content_between_runs(doc)
|
75
|
+
# Retrieve a paragraph from the first section.
|
76
|
+
node_type = Rjb::import("com.aspose.words.NodeType")
|
77
|
+
para = doc.getChild(node_type.PARAGRAPH, 7, true)
|
78
|
+
|
79
|
+
# Use some runs for extraction.
|
80
|
+
start_run = para.getRuns().get(1)
|
81
|
+
end_run = para.getRuns().get(4)
|
82
|
+
|
83
|
+
# Extract the content between these nodes in the document. Include these markers in the extraction.
|
84
|
+
extracted_nodes = extract_contents(start_run, end_run, true)
|
85
|
+
|
86
|
+
# Get the node from the list. There should only be one paragraph returned in the list.
|
87
|
+
node = extracted_nodes.get(0)
|
88
|
+
|
89
|
+
# Print the text of this node to the console.
|
90
|
+
save_format = Rjb::import("com.aspose.words.SaveFormat")
|
91
|
+
puts node.toString(save_format.TEXT)
|
92
|
+
end
|
93
|
+
|
94
|
+
def extract_content_using_field(doc)
|
95
|
+
# Use a document builder to retrieve the field start of a merge field.
|
96
|
+
builder = Rjb::import("com.aspose.words.DocumentBuilder").new(doc)
|
97
|
+
|
98
|
+
# Pass the first boolean parameter to get the DocumentBuilder to move to the FieldStart of the field.
|
99
|
+
# We could also get FieldStarts of a field using GetChildNode method as in the other examples.
|
100
|
+
builder.moveToMergeField("Fullname", false, false)
|
101
|
+
|
102
|
+
#/ The builder cursor should be positioned at the start of the field.
|
103
|
+
node_type = Rjb::import("com.aspose.words.NodeType")
|
104
|
+
start_field = builder.getCurrentNode()
|
105
|
+
end_para = doc.getFirstSection().getChild(node_type.PARAGRAPH, 5, true)
|
106
|
+
|
107
|
+
# Extract the content between these nodes in the document. Don't include these markers in the extraction.
|
108
|
+
extracted_nodes = extract_contents(start_field, end_para, false)
|
109
|
+
|
110
|
+
# Insert the content into a new separate document and save it to disk.
|
111
|
+
dst_doc = generate_document(doc, extracted_nodes)
|
112
|
+
dst_doc.save(@data_dir + "TestFile.Fields Out.doc")
|
113
|
+
end
|
114
|
+
|
115
|
+
def extract_content_between_bookmark(doc)
|
116
|
+
# Retrieve the bookmark from the document.
|
117
|
+
bookmark = doc.getRange().getBookmarks().get("Bookmark1")
|
118
|
+
|
119
|
+
# We use the BookmarkStart and BookmarkEnd nodes as markers.
|
120
|
+
bookmark_start = bookmark.getBookmarkStart()
|
121
|
+
bookmark_end = bookmark.getBookmarkEnd()
|
122
|
+
|
123
|
+
# Firstly extract the content between these nodes including the bookmark.
|
124
|
+
extracted_nodes_inclusive = extract_contents(bookmark_start, bookmark_end, true)
|
125
|
+
dst_doc = generate_document(doc, extracted_nodes_inclusive)
|
126
|
+
dst_doc.save(@data_dir + "TestFile.BookmarkInclusive Out.doc")
|
127
|
+
|
128
|
+
# Secondly extract the content between these nodes this time without including the bookmark.
|
129
|
+
extracted_nodes_exclusive = extract_contents(bookmark_start, bookmark_end, false)
|
130
|
+
dst_doc = generate_document(doc, extracted_nodes_exclusive)
|
131
|
+
dst_doc.save(@data_dir + "TestFile.BookmarkExclusive Out.doc")
|
132
|
+
end
|
133
|
+
|
134
|
+
def extract_content_between_comment_range(doc)
|
135
|
+
# This is a quick way of getting both comment nodes.
|
136
|
+
# Your code should have a proper method of retrieving each corresponding start and end node.
|
137
|
+
node_type = Rjb::import("com.aspose.words.NodeType")
|
138
|
+
comment_start = doc.getChild(node_type.COMMENT_RANGE_START, 0, true)
|
139
|
+
comment_end = doc.getChild(node_type.COMMENT_RANGE_END, 0, true)
|
140
|
+
|
141
|
+
# Firstly extract the content between these nodes including the bookmark.
|
142
|
+
extracted_nodes_inclusive = extract_contents(comment_start, comment_end, true)
|
143
|
+
dst_doc = generate_document(doc, extracted_nodes_inclusive)
|
144
|
+
dst_doc.save(@data_dir + "TestFile.CommentInclusive Out.doc")
|
145
|
+
|
146
|
+
# Secondly extract the content between these nodes this time without including the bookmark.
|
147
|
+
extracted_nodes_exclusive = extract_contents(comment_start, comment_end, false)
|
148
|
+
dst_doc = generate_document(doc, extracted_nodes_exclusive)
|
149
|
+
dst_doc.save(@data_dir + "TestFile.CommentExclusive Out.doc")
|
150
|
+
end
|
151
|
+
|
152
|
+
=begin
|
153
|
+
This is a method which extracts blocks of content from a document between specified nodes.
|
154
|
+
|
155
|
+
Extracts a range of nodes from a document found between specified markers and returns a copy of those nodes. Content can be extracted
|
156
|
+
between inline nodes, block level nodes, and also special nodes such as Comment or Boomarks. Any combination of different marker types can used.
|
157
|
+
|
158
|
+
@param string startNode The node which defines where to start the extraction from the document. This node can be block or inline level of a body.
|
159
|
+
@param string endNode The node which defines where to stop the extraction from the document. This node can be block or inline level of body.
|
160
|
+
@param boolean isInclusive Should the marker nodes be included.
|
161
|
+
=end
|
162
|
+
def extract_contents(startNode, endNode, isInclusive)
|
163
|
+
# First check that the nodes passed to this method are valid for use.
|
164
|
+
verify_parameter_nodes(startNode, endNode)
|
165
|
+
|
166
|
+
# Create a list to store the extracted nodes.
|
167
|
+
nodes = Rjb::import("java.util.ArrayList").new
|
168
|
+
|
169
|
+
# Keep a record of the original nodes passed to this method so we can split marker nodes if needed.
|
170
|
+
originalStartNode = startNode
|
171
|
+
originalEndNode = endNode
|
172
|
+
|
173
|
+
# Extract content based on block level nodes (paragraphs and tables). Traverse through parent nodes to find them.
|
174
|
+
# We will split the content of first and last nodes depending if the marker nodes are inline
|
175
|
+
node_type = Rjb::import("com.aspose.words.NodeType")
|
176
|
+
|
177
|
+
while (startNode.getParentNode().getNodeType() != node_type.BODY) do
|
178
|
+
startNode = startNode.getParentNode()
|
179
|
+
end
|
180
|
+
|
181
|
+
while (endNode.getParentNode().getNodeType() != node_type.BODY) do
|
182
|
+
endNode = endNode.getParentNode()
|
183
|
+
end
|
184
|
+
|
185
|
+
isExtracting = true
|
186
|
+
isStartingNode = true
|
187
|
+
isEndingNode = ''
|
188
|
+
#The current node we are extracting from the document.
|
189
|
+
currNode = startNode
|
190
|
+
|
191
|
+
#Begin extracting content. Process all block level nodes and specifically split the first and last nodes when needed so paragraph formatting is retained.
|
192
|
+
# Method is little more complex than a regular extractor as we need to factor in extracting using inline nodes, fields, bookmarks etc as to make it really useful.
|
193
|
+
while (isExtracting) do
|
194
|
+
# Clone the current node and its children to obtain a copy.
|
195
|
+
cloneNode = currNode.deepClone(true)
|
196
|
+
isEndingNode = currNode.equals(endNode)
|
197
|
+
|
198
|
+
if (isStartingNode || isEndingNode) then
|
199
|
+
# We need to process each marker separately so pass it off to a separate method instead.
|
200
|
+
if (isStartingNode) then
|
201
|
+
process_marker(cloneNode, nodes, originalStartNode, isInclusive, isStartingNode, isEndingNode)
|
202
|
+
isStartingNode = false
|
203
|
+
end
|
204
|
+
# Conditional needs to be separate as the block level start and end markers maybe the same node.
|
205
|
+
if (isEndingNode) then
|
206
|
+
process_marker(cloneNode, nodes, originalEndNode, isInclusive, isStartingNode, isEndingNode)
|
207
|
+
isExtracting = false
|
208
|
+
end
|
209
|
+
else
|
210
|
+
# Node is not a start or end marker, simply add the copy to the list.
|
211
|
+
nodes.add(cloneNode)
|
212
|
+
end
|
213
|
+
|
214
|
+
# Move to the next node and extract it. If next node is null that means the rest of the content is found in a different section.
|
215
|
+
#if (currNode.getNextSibling() == null && isExtracting) then
|
216
|
+
if ((currNode.getNextSibling()).nil? && isExtracting) then
|
217
|
+
# Move to the next section.
|
218
|
+
nodeType = Rjb::import("com.aspose.words.NodeType")
|
219
|
+
nextSection = currNode.getAncestor(nodeType.SECTION).getNextSibling()
|
220
|
+
currNode = nextSection.getBody().getFirstChild()
|
221
|
+
else
|
222
|
+
# Move to the next node in the body.
|
223
|
+
currNode = currNode.getNextSibling()
|
224
|
+
end
|
225
|
+
end
|
226
|
+
# Return the nodes between the node markers.
|
227
|
+
nodes
|
228
|
+
end
|
229
|
+
|
230
|
+
=begin
|
231
|
+
Checks the input parameters are correct and can be used. Throws an exception if there is any problem.
|
232
|
+
=end
|
233
|
+
def verify_parameter_nodes(startNode, endNode)
|
234
|
+
# The order in which these checks are done is important.
|
235
|
+
raise 'Start node cannot be null' if startNode.nil?
|
236
|
+
raise 'End node cannot be null' if endNode.nil?
|
237
|
+
raise "Start node and end node must belong to the same document" if (startNode.getDocument() == endNode.getDocument())
|
238
|
+
|
239
|
+
nodeType = Rjb::import("com.aspose.words.NodeType")
|
240
|
+
#raise "Start node and end node must be a child or descendant of a body" if (startNode.getAncestor(nodeType.BODY) == '' || endNode.getAncestor(nodeType.BODY) == '')
|
241
|
+
raise "Start node and end node must be a child or descendant of a body" if (startNode.getAncestor(nodeType.BODY).nil? || endNode.getAncestor(nodeType.BODY).nil?)
|
242
|
+
|
243
|
+
# Check the end node is after the start node in the DOM tree
|
244
|
+
# First check if they are in different sections, then if they're not check their position in the body of the same section they are in.
|
245
|
+
startSection = startNode.getAncestor(nodeType.SECTION)
|
246
|
+
endSection = endNode.getAncestor(nodeType.SECTION)
|
247
|
+
startIndex = startSection.getParentNode().indexOf(startSection)
|
248
|
+
endIndex = endSection.getParentNode().indexOf(endSection)
|
249
|
+
|
250
|
+
if (startIndex == endIndex) then
|
251
|
+
raise "The end node must be after the start node in the body" if (startSection.getBody().indexOf(startNode) > endSection.getBody().indexOf(endNode))
|
252
|
+
elsif (startIndex > endIndex) then
|
253
|
+
raise "The section of end node must be after the section start node"
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
def generate_document(src_doc, nodes)
|
258
|
+
# Create a blank document.
|
259
|
+
dst_doc = Rjb::import("com.aspose.words.Document").new
|
260
|
+
|
261
|
+
# Remove the first paragraph from the empty document.
|
262
|
+
dst_doc.getFirstSection().getBody().removeAllChildren()
|
263
|
+
|
264
|
+
# Import each node from the list into the new document. Keep the original formatting of the node.
|
265
|
+
import_format_mode = Rjb::import("com.aspose.words.ImportFormatMode")
|
266
|
+
importer = Rjb::import("com.aspose.words.NodeImporter").new(src_doc, dst_doc, import_format_mode.KEEP_SOURCE_FORMATTING)
|
267
|
+
|
268
|
+
i = 0
|
269
|
+
while i < nodes.size
|
270
|
+
node = nodes.get(i)
|
271
|
+
import_node = importer.importNode(node, true)
|
272
|
+
dst_doc.getFirstSection().getBody().appendChild(import_node)
|
273
|
+
i +=1
|
274
|
+
end
|
275
|
+
|
276
|
+
# Return the generated document.
|
277
|
+
dst_doc
|
278
|
+
end
|
279
|
+
|
280
|
+
def process_marker(cloneNode, nodes, node, isInclusive, isStartMarker, isEndMarker)
|
281
|
+
# If we are dealing with a block level node just see if it should be included and add it to the list.
|
282
|
+
if (!is_inline(node)) then
|
283
|
+
# Don't add the node twice if the markers are the same node
|
284
|
+
if(!(isStartMarker && isEndMarker)) then
|
285
|
+
if (isInclusive) then
|
286
|
+
nodes.add(cloneNode)
|
287
|
+
end
|
288
|
+
end
|
289
|
+
return
|
290
|
+
end
|
291
|
+
|
292
|
+
# If a marker is a FieldStart node check if it's to be included or not.
|
293
|
+
# We assume for simplicity that the FieldStart and FieldEnd appear in the same paragraph.
|
294
|
+
nodeType = Rjb::import("com.aspose.words.NodeType")
|
295
|
+
if (node.getNodeType() == nodeType.FIELD_START) then
|
296
|
+
# If the marker is a start node and is not be included then skip to the end of the field.
|
297
|
+
# If the marker is an end node and it is to be included then move to the end field so the field will not be removed.
|
298
|
+
#if ((isStartMarker && !isInclusive) || (!isStartMarker && isInclusive)) then
|
299
|
+
if ((isStartMarker && isInclusive.nil?) || (!isStartMarker && isInclusive)) then
|
300
|
+
#while (node.getNextSibling() != null && node.getNodeType() != nodeType.FIELD_END) do
|
301
|
+
while (node.getNextSibling().nil? && (node.getNodeType() != nodeType.FIELD_END)) do
|
302
|
+
node = node.getNextSibling()
|
303
|
+
end
|
304
|
+
end
|
305
|
+
end
|
306
|
+
|
307
|
+
# If either marker is part of a comment then to include the comment itself we need to move the pointer forward to the Comment
|
308
|
+
# node found after the CommentRangeEnd node.
|
309
|
+
if (node.getNodeType() == nodeType.COMMENT_RANGE_END) then
|
310
|
+
while (node.getNextSibling().nil? && (node.getNodeType() != nodeType.COMMENT)) do
|
311
|
+
node = node.getNextSibling()
|
312
|
+
end
|
313
|
+
end
|
314
|
+
|
315
|
+
# Find the corresponding node in our cloned node by index and return it.
|
316
|
+
# If the start and end node are the same some child nodes might already have been removed. Subtract the
|
317
|
+
# difference to get the right index.
|
318
|
+
indexDiff = (node.getParentNode().getChildNodes().getCount() - cloneNode.getChildNodes().getCount())
|
319
|
+
|
320
|
+
# Child node count identical.
|
321
|
+
if (indexDiff == 0) then
|
322
|
+
node = cloneNode.getChildNodes().get(node.getParentNode().indexOf(node))
|
323
|
+
else
|
324
|
+
node = cloneNode.getChildNodes().get(node.getParentNode().indexOf(node) - indexDiff)
|
325
|
+
end
|
326
|
+
|
327
|
+
# Remove the nodes up to/from the marker.
|
328
|
+
isSkip = ''
|
329
|
+
isProcessing = true
|
330
|
+
isRemoving = isStartMarker
|
331
|
+
nextNode = cloneNode.getFirstChild()
|
332
|
+
#while (isProcessing && nextNode != null) do
|
333
|
+
unless (isProcessing && nextNode.nil?)
|
334
|
+
currentNode = nextNode
|
335
|
+
isSkip = false
|
336
|
+
if (currentNode == node) then
|
337
|
+
if (isStartMarker) then
|
338
|
+
isProcessing = false
|
339
|
+
if isInclusive then
|
340
|
+
isRemoving = false
|
341
|
+
end
|
342
|
+
else
|
343
|
+
isRemoving = true
|
344
|
+
if isInclusive then
|
345
|
+
isSkip = true
|
346
|
+
end
|
347
|
+
end
|
348
|
+
end
|
349
|
+
nextNode = nextNode.getNextSibling()
|
350
|
+
#if (isRemoving && !isSkip) then
|
351
|
+
if (isRemoving && isSkip==false) then
|
352
|
+
currentNode.remove()
|
353
|
+
end
|
354
|
+
end
|
355
|
+
|
356
|
+
# After processing the composite node may become empty. If it has don't include it.
|
357
|
+
if (!(isStartMarker && isEndMarker)) then
|
358
|
+
if cloneNode.hasChildNodes() then
|
359
|
+
nodes.add(cloneNode)
|
360
|
+
end
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
def is_inline(node)
|
365
|
+
# Test if the node is desendant of a Paragraph or Table node and also is not a paragraph or a table a paragraph inside a comment class which is decesant of a pararaph is possible.
|
366
|
+
node_type = Rjb::import("com.aspose.words.NodeType")
|
367
|
+
#return ((node.getAncestor(node_type.PARAGRAPH) != null) || (node.getAncestor(node_type.TABLE) != null) && !(node.getNodeType() == nodeType.PARAGRAPH) || (node.getNodeType() == nodeType.TABLE))
|
368
|
+
return ((node.getAncestor(node_type.PARAGRAPH).nil?) || (node.getAncestor(node_type.TABLE).nil?) && !(node.getNodeType() == node_type.PARAGRAPH) || (node.getNodeType() == node_type.TABLE))
|
369
|
+
end
|
370
|
+
|
371
|
+
def paragraphs_by_style_name(doc, style_name)
|
372
|
+
# Create an array to collect paragraphs of the specified style.
|
373
|
+
paragraphsWithStyle = Rjb::import("java.util.ArrayList").new
|
374
|
+
|
375
|
+
# Get all paragraphs from the document.
|
376
|
+
node_type = Rjb::import("com.aspose.words.NodeType")
|
377
|
+
paragraphs = doc.getChildNodes(node_type.PARAGRAPH, true)
|
378
|
+
paragraphs_count = paragraphs.getCount()
|
379
|
+
#paragraphs_count = java_values($paragraphs_count)
|
380
|
+
|
381
|
+
# Look through all paragraphs to find those with the specified style.
|
382
|
+
i = 0
|
383
|
+
while (i < paragraphs_count) do
|
384
|
+
paragraphs = doc.getChildNodes(node_type.PARAGRAPH, true)
|
385
|
+
paragraph = paragraphs.get(i)
|
386
|
+
if (paragraph.getParagraphFormat().getStyle().getName() == style_name) then
|
387
|
+
paragraphsWithStyle.add(paragraph)
|
388
|
+
end
|
389
|
+
i = i + 1
|
390
|
+
end
|
391
|
+
paragraphsWithStyle
|
392
|
+
end
|
393
|
+
|
394
|
+
end
|
395
|
+
end
|