asposewordsjavaforruby 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/Gempackage +2 -2
  3. data/LICENSE +20 -20
  4. data/README.md +28 -2
  5. data/Rakefile +2 -2
  6. data/asposewordsjavaforruby.gemspec +27 -27
  7. data/config/aspose.yml +5 -5
  8. data/data/LoadTxt.txt +14 -14
  9. data/lib/asposewordsjavaforruby.rb +71 -71
  10. data/lib/asposewordsjavaforruby/addwatermark.rb +84 -84
  11. data/lib/asposewordsjavaforruby/appenddoc.rb +24 -24
  12. data/lib/asposewordsjavaforruby/appenddocument.rb +229 -229
  13. data/lib/asposewordsjavaforruby/applylicense.rb +16 -16
  14. data/lib/asposewordsjavaforruby/asposewordsjava.rb +23 -23
  15. data/lib/asposewordsjavaforruby/autofittables.rb +123 -123
  16. data/lib/asposewordsjavaforruby/bookmarks.rb +132 -132
  17. data/lib/asposewordsjavaforruby/checkformat.rb +70 -70
  18. data/lib/asposewordsjavaforruby/compressimages.rb +53 -53
  19. data/lib/asposewordsjavaforruby/doc2pdf.rb +15 -15
  20. data/lib/asposewordsjavaforruby/doctohtml.rb +26 -26
  21. data/lib/asposewordsjavaforruby/extractcontent.rb +395 -395
  22. data/lib/asposewordsjavaforruby/findandreplace.rb +29 -29
  23. data/lib/asposewordsjavaforruby/helloworld.rb +26 -26
  24. data/lib/asposewordsjavaforruby/imagetopdf.rb +71 -71
  25. data/lib/asposewordsjavaforruby/insertnestedfields.rb +39 -39
  26. data/lib/asposewordsjavaforruby/loadandsavetodisk.rb +20 -20
  27. data/lib/asposewordsjavaforruby/loadandsavetostream.rb +32 -32
  28. data/lib/asposewordsjavaforruby/loadtxt.rb +14 -14
  29. data/lib/asposewordsjavaforruby/mergefield.rb +45 -45
  30. data/lib/asposewordsjavaforruby/nodes.rb +29 -29
  31. data/lib/asposewordsjavaforruby/processcomments.rb +72 -72
  32. data/lib/asposewordsjavaforruby/removebreaks.rb +65 -65
  33. data/lib/asposewordsjavaforruby/removefield.rb +23 -23
  34. data/lib/asposewordsjavaforruby/saveasmultipagetiff.rb +26 -26
  35. data/lib/asposewordsjavaforruby/simplemailmerge.rb +23 -23
  36. data/lib/asposewordsjavaforruby/styles.rb +77 -77
  37. data/lib/asposewordsjavaforruby/updatefields.rb +62 -62
  38. data/lib/asposewordsjavaforruby/version.rb +3 -3
  39. metadata +18 -28
@@ -1,70 +1,70 @@
1
- require 'fileutils'
2
- module Asposewordsjavaforruby
3
- module CheckFormat
4
- def initialize()
5
- # The path to the documents directory.
6
- data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
7
-
8
- @supported_dir = data_dir + 'OutSupported/'
9
- file = Rjb::import("java.io.File").new(data_dir + 'joiningandappending/')
10
-
11
- check_fromat(file)
12
- end
13
-
14
- def check_fromat(file)
15
- files_list = file.listFiles()
16
- load_format = Rjb::import('com.aspose.words.LoadFormat')
17
-
18
- files_list.each do |file|
19
- if(file.isDirectory()) then
20
- next
21
- end
22
-
23
- name_only = file.getName()
24
- puts name_only
25
- file_name = file.getPath()
26
- puts file_name
27
-
28
- info_obj = Rjb::import('com.aspose.words.FileFormatUtil')
29
- info = info_obj.detectFileFormat(file_name)
30
- case info.getLoadFormat()
31
- when load_format.DOC
32
- puts "Microsoft Word 97-2003 document."
33
- when load_format.DOT
34
- puts "Microsoft Word 97-2003 template."
35
- when load_format.DOCX
36
- puts "Office Open XML WordprocessingML Macro-Free Document."
37
- when load_format.DOCM
38
- puts "Office Open XML WordprocessingML Macro-Enabled Document."
39
- when load_format.DOTX
40
- puts "Office Open XML WordprocessingML Macro-Free Template."
41
- when load_format.DOTM
42
- puts "Office Open XML WordprocessingML Macro-Enabled Template."
43
- when load_format.FLAT_OPC
44
- puts "Flat OPC document."
45
- when load_format.RTF
46
- puts "RTF format."
47
- when load_format.WORD_ML
48
- puts "Microsoft Word 2003 WordprocessingML format."
49
- when load_format.HTML
50
- puts "HTML format."
51
- when load_format.MHTML
52
- puts "MHTML (Web archive) format."
53
- when load_format.ODT
54
- puts "OpenDocument Text."
55
- when load_format.OTT
56
- puts "OpenDocument Text Template."
57
- when load_format.DOC_PRE_WORD_97
58
- puts "MS Word 6 or Word 95 format."
59
- else load_format.UNKNOWN
60
- puts "Unknown format."
61
- end
62
-
63
- dest_file_obj = Rjb::import("java.io.File").new(@supported_dir + name_only)
64
- dest_File = dest_file_obj.getPath()
65
- FileUtils.cp(file_name, dest_File)
66
- end
67
- end
68
-
69
- end
70
- end
1
+ require 'fileutils'
2
+ module Asposewordsjavaforruby
3
+ module CheckFormat
4
+ def initialize()
5
+ # The path to the documents directory.
6
+ data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
7
+
8
+ @supported_dir = data_dir + 'OutSupported/'
9
+ file = Rjb::import("java.io.File").new(data_dir + 'joiningandappending/')
10
+
11
+ check_fromat(file)
12
+ end
13
+
14
+ def check_fromat(file)
15
+ files_list = file.listFiles()
16
+ load_format = Rjb::import('com.aspose.words.LoadFormat')
17
+
18
+ files_list.each do |file|
19
+ if(file.isDirectory()) then
20
+ next
21
+ end
22
+
23
+ name_only = file.getName()
24
+ puts name_only
25
+ file_name = file.getPath()
26
+ puts file_name
27
+
28
+ info_obj = Rjb::import('com.aspose.words.FileFormatUtil')
29
+ info = info_obj.detectFileFormat(file_name)
30
+ case info.getLoadFormat()
31
+ when load_format.DOC
32
+ puts "Microsoft Word 97-2003 document."
33
+ when load_format.DOT
34
+ puts "Microsoft Word 97-2003 template."
35
+ when load_format.DOCX
36
+ puts "Office Open XML WordprocessingML Macro-Free Document."
37
+ when load_format.DOCM
38
+ puts "Office Open XML WordprocessingML Macro-Enabled Document."
39
+ when load_format.DOTX
40
+ puts "Office Open XML WordprocessingML Macro-Free Template."
41
+ when load_format.DOTM
42
+ puts "Office Open XML WordprocessingML Macro-Enabled Template."
43
+ when load_format.FLAT_OPC
44
+ puts "Flat OPC document."
45
+ when load_format.RTF
46
+ puts "RTF format."
47
+ when load_format.WORD_ML
48
+ puts "Microsoft Word 2003 WordprocessingML format."
49
+ when load_format.HTML
50
+ puts "HTML format."
51
+ when load_format.MHTML
52
+ puts "MHTML (Web archive) format."
53
+ when load_format.ODT
54
+ puts "OpenDocument Text."
55
+ when load_format.OTT
56
+ puts "OpenDocument Text Template."
57
+ when load_format.DOC_PRE_WORD_97
58
+ puts "MS Word 6 or Word 95 format."
59
+ else load_format.UNKNOWN
60
+ puts "Unknown format."
61
+ end
62
+
63
+ dest_file_obj = Rjb::import("java.io.File").new(@supported_dir + name_only)
64
+ dest_File = dest_file_obj.getPath()
65
+ FileUtils.cp(file_name, dest_File)
66
+ end
67
+ end
68
+
69
+ end
70
+ end
@@ -1,53 +1,53 @@
1
- module Asposewordsjavaforruby
2
- module CompressImages
3
- def initialize()
4
- # The path to the documents directory.
5
- @data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
6
- srcFileName = @data_dir + "TestCompressImages.docx"
7
-
8
- doc = Rjb::import('com.aspose.words.Document').new(@data_dir + "TestCompressImages.docx")
9
-
10
- # Demonstrate autofitting a table to the window.
11
- compress_images(doc, srcFileName)
12
- end
13
-
14
- def compress_images(doc, srcFileName)
15
- messageFormat = Rjb::import("java.text.MessageFormat")
16
- file_size = get_file_size(srcFileName)
17
-
18
- # 220ppi Print - said to be excellent on most printers and screens.
19
- # 150ppi Screen - said to be good for web pages and projectors.
20
- # 96ppi Email - said to be good for minimal document size and sharing.
21
- desiredPpi = 150
22
- # In Java this seems to be a good compression / quality setting.
23
- jpegQuality = 90
24
-
25
- # Resample images to desired ppi and save.
26
- resampler = Rjb::import("com.aspose.words.Resampler").new
27
- count = resampler.resample(doc, desiredPpi, jpegQuality)
28
- puts MessageFormat.format("Resampled {0} images.", count)
29
- if (count != 1) then
30
- puts "We expected to have only 1 image resampled in this test document!"
31
- end
32
- dstFileName = @data_dir + "TestCompressImages Out.docx"
33
- doc.save(dstFileName)
34
- puts messageFormat.format("Saving {0}. Size {1}.", dstFileName, get_file_size(dstFileName))
35
-
36
- # Verify that the first image was compressed by checking the new Ppi.
37
- dst_doc = Rjb::import("com.aspose.words.Document").new(dstFileName)
38
- nodeType = Rjb::import("com.aspose.words.NodeType")
39
- shape = dst_doc.getChild(nodeType.DRAWING_ML, 0, true)
40
- convertUtil = Rjb::import("com.aspose.words.ConvertUtil")
41
- imagePpi = shape.getImageData().getImageSize().getWidthPixels() / convertUtil.pointToInch(shape.getSize().getX())
42
- if (imagePpi < 150) then
43
- puts "Image was not resampled successfully."
44
- end
45
- end
46
-
47
- def get_file_size(file_name)
48
- file = Rjb::import("java.io.File").new(file_name)
49
- return file.length()
50
- end
51
-
52
- end
53
- end
1
+ module Asposewordsjavaforruby
2
+ module CompressImages
3
+ def initialize()
4
+ # The path to the documents directory.
5
+ @data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
6
+ srcFileName = @data_dir + "TestCompressImages.docx"
7
+
8
+ doc = Rjb::import('com.aspose.words.Document').new(@data_dir + "TestCompressImages.docx")
9
+
10
+ # Demonstrate autofitting a table to the window.
11
+ compress_images(doc, srcFileName)
12
+ end
13
+
14
+ def compress_images(doc, srcFileName)
15
+ messageFormat = Rjb::import("java.text.MessageFormat")
16
+ file_size = get_file_size(srcFileName)
17
+
18
+ # 220ppi Print - said to be excellent on most printers and screens.
19
+ # 150ppi Screen - said to be good for web pages and projectors.
20
+ # 96ppi Email - said to be good for minimal document size and sharing.
21
+ desiredPpi = 150
22
+ # In Java this seems to be a good compression / quality setting.
23
+ jpegQuality = 90
24
+
25
+ # Resample images to desired ppi and save.
26
+ resampler = Rjb::import("com.aspose.words.Resampler").new
27
+ count = resampler.resample(doc, desiredPpi, jpegQuality)
28
+ puts MessageFormat.format("Resampled {0} images.", count)
29
+ if (count != 1) then
30
+ puts "We expected to have only 1 image resampled in this test document!"
31
+ end
32
+ dstFileName = @data_dir + "TestCompressImages Out.docx"
33
+ doc.save(dstFileName)
34
+ puts messageFormat.format("Saving {0}. Size {1}.", dstFileName, get_file_size(dstFileName))
35
+
36
+ # Verify that the first image was compressed by checking the new Ppi.
37
+ dst_doc = Rjb::import("com.aspose.words.Document").new(dstFileName)
38
+ nodeType = Rjb::import("com.aspose.words.NodeType")
39
+ shape = dst_doc.getChild(nodeType.DRAWING_ML, 0, true)
40
+ convertUtil = Rjb::import("com.aspose.words.ConvertUtil")
41
+ imagePpi = shape.getImageData().getImageSize().getWidthPixels() / convertUtil.pointToInch(shape.getSize().getX())
42
+ if (imagePpi < 150) then
43
+ puts "Image was not resampled successfully."
44
+ end
45
+ end
46
+
47
+ def get_file_size(file_name)
48
+ file = Rjb::import("java.io.File").new(file_name)
49
+ return file.length()
50
+ end
51
+
52
+ end
53
+ end
@@ -1,15 +1,15 @@
1
- module Asposewordsjavaforruby
2
- module Doc2Pdf
3
-
4
- def doc_to_pdf()
5
- data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
6
-
7
- # Open document.
8
- document = Rjb::import('com.aspose.words.Document').new(data_dir + "Template.doc")
9
-
10
- # Save the document in PDF format.
11
- document.save(data_dir + "Doc2PdfSave Out.pdf")
12
- end
13
-
14
- end
15
- end
1
+ module Asposewordsjavaforruby
2
+ module Doc2Pdf
3
+
4
+ def doc_to_pdf()
5
+ data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
6
+
7
+ # Open document.
8
+ document = Rjb::import('com.aspose.words.Document').new(data_dir + "Template.doc")
9
+
10
+ # Save the document in PDF format.
11
+ document.save(data_dir + "Doc2PdfSave Out.pdf")
12
+ end
13
+
14
+ end
15
+ end
@@ -1,26 +1,26 @@
1
- module Asposewordsjavaforruby
2
- module DocToHTML
3
- def initialize()
4
- # The path to the documents directory.
5
- data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
6
-
7
- # Open the document.
8
- doc = Rjb::import('com.aspose.words.Document').new(data_dir + "TestFile.doc")
9
-
10
- #HtmlSaveOptions options = new HtmlSaveOptions();
11
- options = Rjb::import('com.aspose.words.HtmlSaveOptions').new
12
-
13
- # HtmlSaveOptions.ExportRoundtripInformation property specifies
14
- # whether to write the roundtrip information when saving to HTML, MHTML or EPUB.
15
- # Default value is true for HTML and false for MHTML and EPUB.
16
- options.setExportRoundtripInformation(true)
17
- doc.save(data_dir + "ExportRoundtripInformation Out.html", options)
18
-
19
- doc = Rjb::import('com.aspose.words.Document').new(data_dir + "ExportRoundtripInformation Out.html")
20
-
21
- # Save the document Docx file format
22
- save_format = Rjb::import('com.aspose.words.SaveFormat')
23
- doc.save(data_dir + "Out.docx", save_format.DOCX)
24
- end
25
- end
26
- end
1
+ module Asposewordsjavaforruby
2
+ module DocToHTML
3
+ def initialize()
4
+ # The path to the documents directory.
5
+ data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
6
+
7
+ # Open the document.
8
+ doc = Rjb::import('com.aspose.words.Document').new(data_dir + "TestFile.doc")
9
+
10
+ #HtmlSaveOptions options = new HtmlSaveOptions();
11
+ options = Rjb::import('com.aspose.words.HtmlSaveOptions').new
12
+
13
+ # HtmlSaveOptions.ExportRoundtripInformation property specifies
14
+ # whether to write the roundtrip information when saving to HTML, MHTML or EPUB.
15
+ # Default value is true for HTML and false for MHTML and EPUB.
16
+ options.setExportRoundtripInformation(true)
17
+ doc.save(data_dir + "ExportRoundtripInformation Out.html", options)
18
+
19
+ doc = Rjb::import('com.aspose.words.Document').new(data_dir + "ExportRoundtripInformation Out.html")
20
+
21
+ # Save the document Docx file format
22
+ save_format = Rjb::import('com.aspose.words.SaveFormat')
23
+ doc.save(data_dir + "Out.docx", save_format.DOCX)
24
+ end
25
+ end
26
+ end
@@ -1,395 +1,395 @@
1
- module Asposewordsjavaforruby
2
- module ExtractContent
3
- def initialize()
4
- # The path to the documents directory.
5
- @data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/document/'
6
-
7
- # Open the document.
8
- doc = Rjb::import('com.aspose.words.Document').new(@data_dir + "TestFile.doc")
9
-
10
- extract_content_between_paragraphs(doc)
11
- extract_content_between_block_level_nodes(doc)
12
- extract_content_between_paragraph_styles(doc)
13
- extract_content_between_runs(doc)
14
- extract_content_using_field(doc)
15
- extract_content_between_bookmark(doc)
16
- extract_content_between_comment_range(doc)
17
- end
18
-
19
- def extract_content_between_paragraphs(doc)
20
- # Gather the nodes. The GetChild method uses 0-based index
21
- node_type = Rjb::import("com.aspose.words.NodeType")
22
- start_para = doc.getFirstSection().getChild(node_type.PARAGRAPH, 6, true)
23
- end_para = doc.getFirstSection().getChild(node_type.PARAGRAPH, 10, true)
24
-
25
- # Extract the content between these nodes in the document. Include these markers in the extraction.
26
- extracted_nodes = extract_contents(start_para, end_para, true)
27
-
28
- # Insert the content into a new separate document and save it to disk.
29
- dst_doc = generate_document(doc, extracted_nodes)
30
- dst_doc.save(@data_dir + "TestFile.Paragraphs Out.doc")
31
- end
32
-
33
- def extract_content_between_block_level_nodes(doc)
34
- # Gather the nodes. The GetChild method uses 0-based index
35
- node_type = Rjb::import("com.aspose.words.NodeType")
36
- start_para = doc.getLastSection().getChild(node_type.PARAGRAPH, 2, true)
37
- end_table = doc.getLastSection().getChild(node_type.TABLE, 0, true)
38
-
39
- # Extract the content between these nodes in the document. Include these markers in the extraction.
40
- extracted_nodes = extract_contents(start_para, end_table, true)
41
-
42
- # Lets reverse the array to make inserting the content back into the document easier.
43
- collections = Rjb::import("java.util.Collections")
44
- collections.reverse(extracted_nodes)
45
-
46
- while extracted_nodes.size() > 0 do
47
- # Insert the last node from the reversed list
48
- end_table.getParentNode().insertAfter(extracted_nodes.get(0), end_table)
49
- # Remove this node from the list after insertion.
50
- extracted_nodes.remove(0)
51
- end
52
-
53
- # Save the generated document to disk.
54
- doc.save(@data_dir + "TestFile.DuplicatedContent Out.doc")
55
- end
56
-
57
- def extract_content_between_paragraph_styles(doc)
58
- # Gather a list of the paragraphs using the respective heading styles.
59
- paras_style_heading1 = paragraphs_by_style_name(doc, "Heading 1")
60
- paras_style_heading3 = paragraphs_by_style_name(doc, "Heading 3")
61
-
62
- # Use the first instance of the paragraphs with those styles.
63
- start_para1 = paras_style_heading1.get(0)
64
- end_para1 = paras_style_heading3.get(0)
65
-
66
- # Extract the content between these nodes in the document. Don't include these markers in the extraction.
67
- extracted_nodes = extract_contents(start_para1, end_para1, false)
68
-
69
- # Insert the content into a new separate document and save it to disk.
70
- dst_doc = generate_document(doc, extracted_nodes)
71
- dst_doc.save(@data_dir + "TestFile.Styles Out.doc")
72
- end
73
-
74
- def extract_content_between_runs(doc)
75
- # Retrieve a paragraph from the first section.
76
- node_type = Rjb::import("com.aspose.words.NodeType")
77
- para = doc.getChild(node_type.PARAGRAPH, 7, true)
78
-
79
- # Use some runs for extraction.
80
- start_run = para.getRuns().get(1)
81
- end_run = para.getRuns().get(4)
82
-
83
- # Extract the content between these nodes in the document. Include these markers in the extraction.
84
- extracted_nodes = extract_contents(start_run, end_run, true)
85
-
86
- # Get the node from the list. There should only be one paragraph returned in the list.
87
- node = extracted_nodes.get(0)
88
-
89
- # Print the text of this node to the console.
90
- save_format = Rjb::import("com.aspose.words.SaveFormat")
91
- puts node.toString(save_format.TEXT)
92
- end
93
-
94
- def extract_content_using_field(doc)
95
- # Use a document builder to retrieve the field start of a merge field.
96
- builder = Rjb::import("com.aspose.words.DocumentBuilder").new(doc)
97
-
98
- # Pass the first boolean parameter to get the DocumentBuilder to move to the FieldStart of the field.
99
- # We could also get FieldStarts of a field using GetChildNode method as in the other examples.
100
- builder.moveToMergeField("Fullname", false, false)
101
-
102
- #/ The builder cursor should be positioned at the start of the field.
103
- node_type = Rjb::import("com.aspose.words.NodeType")
104
- start_field = builder.getCurrentNode()
105
- end_para = doc.getFirstSection().getChild(node_type.PARAGRAPH, 5, true)
106
-
107
- # Extract the content between these nodes in the document. Don't include these markers in the extraction.
108
- extracted_nodes = extract_contents(start_field, end_para, false)
109
-
110
- # Insert the content into a new separate document and save it to disk.
111
- dst_doc = generate_document(doc, extracted_nodes)
112
- dst_doc.save(@data_dir + "TestFile.Fields Out.doc")
113
- end
114
-
115
- def extract_content_between_bookmark(doc)
116
- # Retrieve the bookmark from the document.
117
- bookmark = doc.getRange().getBookmarks().get("Bookmark1")
118
-
119
- # We use the BookmarkStart and BookmarkEnd nodes as markers.
120
- bookmark_start = bookmark.getBookmarkStart()
121
- bookmark_end = bookmark.getBookmarkEnd()
122
-
123
- # Firstly extract the content between these nodes including the bookmark.
124
- extracted_nodes_inclusive = extract_contents(bookmark_start, bookmark_end, true)
125
- dst_doc = generate_document(doc, extracted_nodes_inclusive)
126
- dst_doc.save(@data_dir + "TestFile.BookmarkInclusive Out.doc")
127
-
128
- # Secondly extract the content between these nodes this time without including the bookmark.
129
- extracted_nodes_exclusive = extract_contents(bookmark_start, bookmark_end, false)
130
- dst_doc = generate_document(doc, extracted_nodes_exclusive)
131
- dst_doc.save(@data_dir + "TestFile.BookmarkExclusive Out.doc")
132
- end
133
-
134
- def extract_content_between_comment_range(doc)
135
- # This is a quick way of getting both comment nodes.
136
- # Your code should have a proper method of retrieving each corresponding start and end node.
137
- node_type = Rjb::import("com.aspose.words.NodeType")
138
- comment_start = doc.getChild(node_type.COMMENT_RANGE_START, 0, true)
139
- comment_end = doc.getChild(node_type.COMMENT_RANGE_END, 0, true)
140
-
141
- # Firstly extract the content between these nodes including the bookmark.
142
- extracted_nodes_inclusive = extract_contents(comment_start, comment_end, true)
143
- dst_doc = generate_document(doc, extracted_nodes_inclusive)
144
- dst_doc.save(@data_dir + "TestFile.CommentInclusive Out.doc")
145
-
146
- # Secondly extract the content between these nodes this time without including the bookmark.
147
- extracted_nodes_exclusive = extract_contents(comment_start, comment_end, false)
148
- dst_doc = generate_document(doc, extracted_nodes_exclusive)
149
- dst_doc.save(@data_dir + "TestFile.CommentExclusive Out.doc")
150
- end
151
-
152
- =begin
153
- This is a method which extracts blocks of content from a document between specified nodes.
154
-
155
- Extracts a range of nodes from a document found between specified markers and returns a copy of those nodes. Content can be extracted
156
- between inline nodes, block level nodes, and also special nodes such as Comment or Boomarks. Any combination of different marker types can used.
157
-
158
- @param string startNode The node which defines where to start the extraction from the document. This node can be block or inline level of a body.
159
- @param string endNode The node which defines where to stop the extraction from the document. This node can be block or inline level of body.
160
- @param boolean isInclusive Should the marker nodes be included.
161
- =end
162
- def extract_contents(startNode, endNode, isInclusive)
163
- # First check that the nodes passed to this method are valid for use.
164
- verify_parameter_nodes(startNode, endNode)
165
-
166
- # Create a list to store the extracted nodes.
167
- nodes = Rjb::import("java.util.ArrayList").new
168
-
169
- # Keep a record of the original nodes passed to this method so we can split marker nodes if needed.
170
- originalStartNode = startNode
171
- originalEndNode = endNode
172
-
173
- # Extract content based on block level nodes (paragraphs and tables). Traverse through parent nodes to find them.
174
- # We will split the content of first and last nodes depending if the marker nodes are inline
175
- node_type = Rjb::import("com.aspose.words.NodeType")
176
-
177
- while (startNode.getParentNode().getNodeType() != node_type.BODY) do
178
- startNode = startNode.getParentNode()
179
- end
180
-
181
- while (endNode.getParentNode().getNodeType() != node_type.BODY) do
182
- endNode = endNode.getParentNode()
183
- end
184
-
185
- isExtracting = true
186
- isStartingNode = true
187
- isEndingNode = ''
188
- #The current node we are extracting from the document.
189
- currNode = startNode
190
-
191
- #Begin extracting content. Process all block level nodes and specifically split the first and last nodes when needed so paragraph formatting is retained.
192
- # Method is little more complex than a regular extractor as we need to factor in extracting using inline nodes, fields, bookmarks etc as to make it really useful.
193
- while (isExtracting) do
194
- # Clone the current node and its children to obtain a copy.
195
- cloneNode = currNode.deepClone(true)
196
- isEndingNode = currNode.equals(endNode)
197
-
198
- if (isStartingNode || isEndingNode) then
199
- # We need to process each marker separately so pass it off to a separate method instead.
200
- if (isStartingNode) then
201
- process_marker(cloneNode, nodes, originalStartNode, isInclusive, isStartingNode, isEndingNode)
202
- isStartingNode = false
203
- end
204
- # Conditional needs to be separate as the block level start and end markers maybe the same node.
205
- if (isEndingNode) then
206
- process_marker(cloneNode, nodes, originalEndNode, isInclusive, isStartingNode, isEndingNode)
207
- isExtracting = false
208
- end
209
- else
210
- # Node is not a start or end marker, simply add the copy to the list.
211
- nodes.add(cloneNode)
212
- end
213
-
214
- # Move to the next node and extract it. If next node is null that means the rest of the content is found in a different section.
215
- #if (currNode.getNextSibling() == null && isExtracting) then
216
- if ((currNode.getNextSibling()).nil? && isExtracting) then
217
- # Move to the next section.
218
- nodeType = Rjb::import("com.aspose.words.NodeType")
219
- nextSection = currNode.getAncestor(nodeType.SECTION).getNextSibling()
220
- currNode = nextSection.getBody().getFirstChild()
221
- else
222
- # Move to the next node in the body.
223
- currNode = currNode.getNextSibling()
224
- end
225
- end
226
- # Return the nodes between the node markers.
227
- nodes
228
- end
229
-
230
- =begin
231
- Checks the input parameters are correct and can be used. Throws an exception if there is any problem.
232
- =end
233
- def verify_parameter_nodes(startNode, endNode)
234
- # The order in which these checks are done is important.
235
- raise 'Start node cannot be null' if startNode.nil?
236
- raise 'End node cannot be null' if endNode.nil?
237
- raise "Start node and end node must belong to the same document" if (startNode.getDocument() == endNode.getDocument())
238
-
239
- nodeType = Rjb::import("com.aspose.words.NodeType")
240
- #raise "Start node and end node must be a child or descendant of a body" if (startNode.getAncestor(nodeType.BODY) == '' || endNode.getAncestor(nodeType.BODY) == '')
241
- raise "Start node and end node must be a child or descendant of a body" if (startNode.getAncestor(nodeType.BODY).nil? || endNode.getAncestor(nodeType.BODY).nil?)
242
-
243
- # Check the end node is after the start node in the DOM tree
244
- # First check if they are in different sections, then if they're not check their position in the body of the same section they are in.
245
- startSection = startNode.getAncestor(nodeType.SECTION)
246
- endSection = endNode.getAncestor(nodeType.SECTION)
247
- startIndex = startSection.getParentNode().indexOf(startSection)
248
- endIndex = endSection.getParentNode().indexOf(endSection)
249
-
250
- if (startIndex == endIndex) then
251
- raise "The end node must be after the start node in the body" if (startSection.getBody().indexOf(startNode) > endSection.getBody().indexOf(endNode))
252
- elsif (startIndex > endIndex) then
253
- raise "The section of end node must be after the section start node"
254
- end
255
- end
256
-
257
- def generate_document(src_doc, nodes)
258
- # Create a blank document.
259
- dst_doc = Rjb::import("com.aspose.words.Document").new
260
-
261
- # Remove the first paragraph from the empty document.
262
- dst_doc.getFirstSection().getBody().removeAllChildren()
263
-
264
- # Import each node from the list into the new document. Keep the original formatting of the node.
265
- import_format_mode = Rjb::import("com.aspose.words.ImportFormatMode")
266
- importer = Rjb::import("com.aspose.words.NodeImporter").new(src_doc, dst_doc, import_format_mode.KEEP_SOURCE_FORMATTING)
267
-
268
- i = 0
269
- while i < nodes.size
270
- node = nodes.get(i)
271
- import_node = importer.importNode(node, true)
272
- dst_doc.getFirstSection().getBody().appendChild(import_node)
273
- i +=1
274
- end
275
-
276
- # Return the generated document.
277
- dst_doc
278
- end
279
-
280
- def process_marker(cloneNode, nodes, node, isInclusive, isStartMarker, isEndMarker)
281
- # If we are dealing with a block level node just see if it should be included and add it to the list.
282
- if (!is_inline(node)) then
283
- # Don't add the node twice if the markers are the same node
284
- if(!(isStartMarker && isEndMarker)) then
285
- if (isInclusive) then
286
- nodes.add(cloneNode)
287
- end
288
- end
289
- return
290
- end
291
-
292
- # If a marker is a FieldStart node check if it's to be included or not.
293
- # We assume for simplicity that the FieldStart and FieldEnd appear in the same paragraph.
294
- nodeType = Rjb::import("com.aspose.words.NodeType")
295
- if (node.getNodeType() == nodeType.FIELD_START) then
296
- # If the marker is a start node and is not be included then skip to the end of the field.
297
- # If the marker is an end node and it is to be included then move to the end field so the field will not be removed.
298
- #if ((isStartMarker && !isInclusive) || (!isStartMarker && isInclusive)) then
299
- if ((isStartMarker && isInclusive.nil?) || (!isStartMarker && isInclusive)) then
300
- #while (node.getNextSibling() != null && node.getNodeType() != nodeType.FIELD_END) do
301
- while (node.getNextSibling().nil? && (node.getNodeType() != nodeType.FIELD_END)) do
302
- node = node.getNextSibling()
303
- end
304
- end
305
- end
306
-
307
- # If either marker is part of a comment then to include the comment itself we need to move the pointer forward to the Comment
308
- # node found after the CommentRangeEnd node.
309
- if (node.getNodeType() == nodeType.COMMENT_RANGE_END) then
310
- while (node.getNextSibling().nil? && (node.getNodeType() != nodeType.COMMENT)) do
311
- node = node.getNextSibling()
312
- end
313
- end
314
-
315
- # Find the corresponding node in our cloned node by index and return it.
316
- # If the start and end node are the same some child nodes might already have been removed. Subtract the
317
- # difference to get the right index.
318
- indexDiff = (node.getParentNode().getChildNodes().getCount() - cloneNode.getChildNodes().getCount())
319
-
320
- # Child node count identical.
321
- if (indexDiff == 0) then
322
- node = cloneNode.getChildNodes().get(node.getParentNode().indexOf(node))
323
- else
324
- node = cloneNode.getChildNodes().get(node.getParentNode().indexOf(node) - indexDiff)
325
- end
326
-
327
- # Remove the nodes up to/from the marker.
328
- isSkip = ''
329
- isProcessing = true
330
- isRemoving = isStartMarker
331
- nextNode = cloneNode.getFirstChild()
332
- #while (isProcessing && nextNode != null) do
333
- unless (isProcessing && nextNode.nil?)
334
- currentNode = nextNode
335
- isSkip = false
336
- if (currentNode == node) then
337
- if (isStartMarker) then
338
- isProcessing = false
339
- if isInclusive then
340
- isRemoving = false
341
- end
342
- else
343
- isRemoving = true
344
- if isInclusive then
345
- isSkip = true
346
- end
347
- end
348
- end
349
- nextNode = nextNode.getNextSibling()
350
- #if (isRemoving && !isSkip) then
351
- if (isRemoving && isSkip==false) then
352
- currentNode.remove()
353
- end
354
- end
355
-
356
- # After processing the composite node may become empty. If it has don't include it.
357
- if (!(isStartMarker && isEndMarker)) then
358
- if cloneNode.hasChildNodes() then
359
- nodes.add(cloneNode)
360
- end
361
- end
362
- end
363
-
364
- def is_inline(node)
365
- # Test if the node is desendant of a Paragraph or Table node and also is not a paragraph or a table a paragraph inside a comment class which is decesant of a pararaph is possible.
366
- node_type = Rjb::import("com.aspose.words.NodeType")
367
- #return ((node.getAncestor(node_type.PARAGRAPH) != null) || (node.getAncestor(node_type.TABLE) != null) && !(node.getNodeType() == nodeType.PARAGRAPH) || (node.getNodeType() == nodeType.TABLE))
368
- return ((node.getAncestor(node_type.PARAGRAPH).nil?) || (node.getAncestor(node_type.TABLE).nil?) && !(node.getNodeType() == node_type.PARAGRAPH) || (node.getNodeType() == node_type.TABLE))
369
- end
370
-
371
- def paragraphs_by_style_name(doc, style_name)
372
- # Create an array to collect paragraphs of the specified style.
373
- paragraphsWithStyle = Rjb::import("java.util.ArrayList").new
374
-
375
- # Get all paragraphs from the document.
376
- node_type = Rjb::import("com.aspose.words.NodeType")
377
- paragraphs = doc.getChildNodes(node_type.PARAGRAPH, true)
378
- paragraphs_count = paragraphs.getCount()
379
- #paragraphs_count = java_values($paragraphs_count)
380
-
381
- # Look through all paragraphs to find those with the specified style.
382
- i = 0
383
- while (i < paragraphs_count) do
384
- paragraphs = doc.getChildNodes(node_type.PARAGRAPH, true)
385
- paragraph = paragraphs.get(i)
386
- if (paragraph.getParagraphFormat().getStyle().getName() == style_name) then
387
- paragraphsWithStyle.add(paragraph)
388
- end
389
- i = i + 1
390
- end
391
- paragraphsWithStyle
392
- end
393
-
394
- end
395
- end
1
+ module Asposewordsjavaforruby
2
+ module ExtractContent
3
+ def initialize()
4
+ # The path to the documents directory.
5
+ @data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/document/'
6
+
7
+ # Open the document.
8
+ doc = Rjb::import('com.aspose.words.Document').new(@data_dir + "TestFile.doc")
9
+
10
+ extract_content_between_paragraphs(doc)
11
+ extract_content_between_block_level_nodes(doc)
12
+ extract_content_between_paragraph_styles(doc)
13
+ extract_content_between_runs(doc)
14
+ extract_content_using_field(doc)
15
+ extract_content_between_bookmark(doc)
16
+ extract_content_between_comment_range(doc)
17
+ end
18
+
19
+ def extract_content_between_paragraphs(doc)
20
+ # Gather the nodes. The GetChild method uses 0-based index
21
+ node_type = Rjb::import("com.aspose.words.NodeType")
22
+ start_para = doc.getFirstSection().getChild(node_type.PARAGRAPH, 6, true)
23
+ end_para = doc.getFirstSection().getChild(node_type.PARAGRAPH, 10, true)
24
+
25
+ # Extract the content between these nodes in the document. Include these markers in the extraction.
26
+ extracted_nodes = extract_contents(start_para, end_para, true)
27
+
28
+ # Insert the content into a new separate document and save it to disk.
29
+ dst_doc = generate_document(doc, extracted_nodes)
30
+ dst_doc.save(@data_dir + "TestFile.Paragraphs Out.doc")
31
+ end
32
+
33
+ def extract_content_between_block_level_nodes(doc)
34
+ # Gather the nodes. The GetChild method uses 0-based index
35
+ node_type = Rjb::import("com.aspose.words.NodeType")
36
+ start_para = doc.getLastSection().getChild(node_type.PARAGRAPH, 2, true)
37
+ end_table = doc.getLastSection().getChild(node_type.TABLE, 0, true)
38
+
39
+ # Extract the content between these nodes in the document. Include these markers in the extraction.
40
+ extracted_nodes = extract_contents(start_para, end_table, true)
41
+
42
+ # Lets reverse the array to make inserting the content back into the document easier.
43
+ collections = Rjb::import("java.util.Collections")
44
+ collections.reverse(extracted_nodes)
45
+
46
+ while extracted_nodes.size() > 0 do
47
+ # Insert the last node from the reversed list
48
+ end_table.getParentNode().insertAfter(extracted_nodes.get(0), end_table)
49
+ # Remove this node from the list after insertion.
50
+ extracted_nodes.remove(0)
51
+ end
52
+
53
+ # Save the generated document to disk.
54
+ doc.save(@data_dir + "TestFile.DuplicatedContent Out.doc")
55
+ end
56
+
57
+ def extract_content_between_paragraph_styles(doc)
58
+ # Gather a list of the paragraphs using the respective heading styles.
59
+ paras_style_heading1 = paragraphs_by_style_name(doc, "Heading 1")
60
+ paras_style_heading3 = paragraphs_by_style_name(doc, "Heading 3")
61
+
62
+ # Use the first instance of the paragraphs with those styles.
63
+ start_para1 = paras_style_heading1.get(0)
64
+ end_para1 = paras_style_heading3.get(0)
65
+
66
+ # Extract the content between these nodes in the document. Don't include these markers in the extraction.
67
+ extracted_nodes = extract_contents(start_para1, end_para1, false)
68
+
69
+ # Insert the content into a new separate document and save it to disk.
70
+ dst_doc = generate_document(doc, extracted_nodes)
71
+ dst_doc.save(@data_dir + "TestFile.Styles Out.doc")
72
+ end
73
+
74
+ def extract_content_between_runs(doc)
75
+ # Retrieve a paragraph from the first section.
76
+ node_type = Rjb::import("com.aspose.words.NodeType")
77
+ para = doc.getChild(node_type.PARAGRAPH, 7, true)
78
+
79
+ # Use some runs for extraction.
80
+ start_run = para.getRuns().get(1)
81
+ end_run = para.getRuns().get(4)
82
+
83
+ # Extract the content between these nodes in the document. Include these markers in the extraction.
84
+ extracted_nodes = extract_contents(start_run, end_run, true)
85
+
86
+ # Get the node from the list. There should only be one paragraph returned in the list.
87
+ node = extracted_nodes.get(0)
88
+
89
+ # Print the text of this node to the console.
90
+ save_format = Rjb::import("com.aspose.words.SaveFormat")
91
+ puts node.toString(save_format.TEXT)
92
+ end
93
+
94
+ def extract_content_using_field(doc)
95
+ # Use a document builder to retrieve the field start of a merge field.
96
+ builder = Rjb::import("com.aspose.words.DocumentBuilder").new(doc)
97
+
98
+ # Pass the first boolean parameter to get the DocumentBuilder to move to the FieldStart of the field.
99
+ # We could also get FieldStarts of a field using GetChildNode method as in the other examples.
100
+ builder.moveToMergeField("Fullname", false, false)
101
+
102
+ #/ The builder cursor should be positioned at the start of the field.
103
+ node_type = Rjb::import("com.aspose.words.NodeType")
104
+ start_field = builder.getCurrentNode()
105
+ end_para = doc.getFirstSection().getChild(node_type.PARAGRAPH, 5, true)
106
+
107
+ # Extract the content between these nodes in the document. Don't include these markers in the extraction.
108
+ extracted_nodes = extract_contents(start_field, end_para, false)
109
+
110
+ # Insert the content into a new separate document and save it to disk.
111
+ dst_doc = generate_document(doc, extracted_nodes)
112
+ dst_doc.save(@data_dir + "TestFile.Fields Out.doc")
113
+ end
114
+
115
+ def extract_content_between_bookmark(doc)
116
+ # Retrieve the bookmark from the document.
117
+ bookmark = doc.getRange().getBookmarks().get("Bookmark1")
118
+
119
+ # We use the BookmarkStart and BookmarkEnd nodes as markers.
120
+ bookmark_start = bookmark.getBookmarkStart()
121
+ bookmark_end = bookmark.getBookmarkEnd()
122
+
123
+ # Firstly extract the content between these nodes including the bookmark.
124
+ extracted_nodes_inclusive = extract_contents(bookmark_start, bookmark_end, true)
125
+ dst_doc = generate_document(doc, extracted_nodes_inclusive)
126
+ dst_doc.save(@data_dir + "TestFile.BookmarkInclusive Out.doc")
127
+
128
+ # Secondly extract the content between these nodes this time without including the bookmark.
129
+ extracted_nodes_exclusive = extract_contents(bookmark_start, bookmark_end, false)
130
+ dst_doc = generate_document(doc, extracted_nodes_exclusive)
131
+ dst_doc.save(@data_dir + "TestFile.BookmarkExclusive Out.doc")
132
+ end
133
+
134
+ def extract_content_between_comment_range(doc)
135
+ # This is a quick way of getting both comment nodes.
136
+ # Your code should have a proper method of retrieving each corresponding start and end node.
137
+ node_type = Rjb::import("com.aspose.words.NodeType")
138
+ comment_start = doc.getChild(node_type.COMMENT_RANGE_START, 0, true)
139
+ comment_end = doc.getChild(node_type.COMMENT_RANGE_END, 0, true)
140
+
141
+ # Firstly extract the content between these nodes including the bookmark.
142
+ extracted_nodes_inclusive = extract_contents(comment_start, comment_end, true)
143
+ dst_doc = generate_document(doc, extracted_nodes_inclusive)
144
+ dst_doc.save(@data_dir + "TestFile.CommentInclusive Out.doc")
145
+
146
+ # Secondly extract the content between these nodes this time without including the bookmark.
147
+ extracted_nodes_exclusive = extract_contents(comment_start, comment_end, false)
148
+ dst_doc = generate_document(doc, extracted_nodes_exclusive)
149
+ dst_doc.save(@data_dir + "TestFile.CommentExclusive Out.doc")
150
+ end
151
+
152
+ =begin
153
+ This is a method which extracts blocks of content from a document between specified nodes.
154
+
155
+ Extracts a range of nodes from a document found between specified markers and returns a copy of those nodes. Content can be extracted
156
+ between inline nodes, block level nodes, and also special nodes such as Comment or Boomarks. Any combination of different marker types can used.
157
+
158
+ @param string startNode The node which defines where to start the extraction from the document. This node can be block or inline level of a body.
159
+ @param string endNode The node which defines where to stop the extraction from the document. This node can be block or inline level of body.
160
+ @param boolean isInclusive Should the marker nodes be included.
161
+ =end
162
+ def extract_contents(startNode, endNode, isInclusive)
163
+ # First check that the nodes passed to this method are valid for use.
164
+ verify_parameter_nodes(startNode, endNode)
165
+
166
+ # Create a list to store the extracted nodes.
167
+ nodes = Rjb::import("java.util.ArrayList").new
168
+
169
+ # Keep a record of the original nodes passed to this method so we can split marker nodes if needed.
170
+ originalStartNode = startNode
171
+ originalEndNode = endNode
172
+
173
+ # Extract content based on block level nodes (paragraphs and tables). Traverse through parent nodes to find them.
174
+ # We will split the content of first and last nodes depending if the marker nodes are inline
175
+ node_type = Rjb::import("com.aspose.words.NodeType")
176
+
177
+ while (startNode.getParentNode().getNodeType() != node_type.BODY) do
178
+ startNode = startNode.getParentNode()
179
+ end
180
+
181
+ while (endNode.getParentNode().getNodeType() != node_type.BODY) do
182
+ endNode = endNode.getParentNode()
183
+ end
184
+
185
+ isExtracting = true
186
+ isStartingNode = true
187
+ isEndingNode = ''
188
+ #The current node we are extracting from the document.
189
+ currNode = startNode
190
+
191
+ #Begin extracting content. Process all block level nodes and specifically split the first and last nodes when needed so paragraph formatting is retained.
192
+ # Method is little more complex than a regular extractor as we need to factor in extracting using inline nodes, fields, bookmarks etc as to make it really useful.
193
+ while (isExtracting) do
194
+ # Clone the current node and its children to obtain a copy.
195
+ cloneNode = currNode.deepClone(true)
196
+ isEndingNode = currNode.equals(endNode)
197
+
198
+ if (isStartingNode || isEndingNode) then
199
+ # We need to process each marker separately so pass it off to a separate method instead.
200
+ if (isStartingNode) then
201
+ process_marker(cloneNode, nodes, originalStartNode, isInclusive, isStartingNode, isEndingNode)
202
+ isStartingNode = false
203
+ end
204
+ # Conditional needs to be separate as the block level start and end markers maybe the same node.
205
+ if (isEndingNode) then
206
+ process_marker(cloneNode, nodes, originalEndNode, isInclusive, isStartingNode, isEndingNode)
207
+ isExtracting = false
208
+ end
209
+ else
210
+ # Node is not a start or end marker, simply add the copy to the list.
211
+ nodes.add(cloneNode)
212
+ end
213
+
214
+ # Move to the next node and extract it. If next node is null that means the rest of the content is found in a different section.
215
+ #if (currNode.getNextSibling() == null && isExtracting) then
216
+ if ((currNode.getNextSibling()).nil? && isExtracting) then
217
+ # Move to the next section.
218
+ nodeType = Rjb::import("com.aspose.words.NodeType")
219
+ nextSection = currNode.getAncestor(nodeType.SECTION).getNextSibling()
220
+ currNode = nextSection.getBody().getFirstChild()
221
+ else
222
+ # Move to the next node in the body.
223
+ currNode = currNode.getNextSibling()
224
+ end
225
+ end
226
+ # Return the nodes between the node markers.
227
+ nodes
228
+ end
229
+
230
+ =begin
231
+ Checks the input parameters are correct and can be used. Throws an exception if there is any problem.
232
+ =end
233
+ def verify_parameter_nodes(startNode, endNode)
234
+ # The order in which these checks are done is important.
235
+ raise 'Start node cannot be null' if startNode.nil?
236
+ raise 'End node cannot be null' if endNode.nil?
237
+ raise "Start node and end node must belong to the same document" if (startNode.getDocument() == endNode.getDocument())
238
+
239
+ nodeType = Rjb::import("com.aspose.words.NodeType")
240
+ #raise "Start node and end node must be a child or descendant of a body" if (startNode.getAncestor(nodeType.BODY) == '' || endNode.getAncestor(nodeType.BODY) == '')
241
+ raise "Start node and end node must be a child or descendant of a body" if (startNode.getAncestor(nodeType.BODY).nil? || endNode.getAncestor(nodeType.BODY).nil?)
242
+
243
+ # Check the end node is after the start node in the DOM tree
244
+ # First check if they are in different sections, then if they're not check their position in the body of the same section they are in.
245
+ startSection = startNode.getAncestor(nodeType.SECTION)
246
+ endSection = endNode.getAncestor(nodeType.SECTION)
247
+ startIndex = startSection.getParentNode().indexOf(startSection)
248
+ endIndex = endSection.getParentNode().indexOf(endSection)
249
+
250
+ if (startIndex == endIndex) then
251
+ raise "The end node must be after the start node in the body" if (startSection.getBody().indexOf(startNode) > endSection.getBody().indexOf(endNode))
252
+ elsif (startIndex > endIndex) then
253
+ raise "The section of end node must be after the section start node"
254
+ end
255
+ end
256
+
257
+ def generate_document(src_doc, nodes)
258
+ # Create a blank document.
259
+ dst_doc = Rjb::import("com.aspose.words.Document").new
260
+
261
+ # Remove the first paragraph from the empty document.
262
+ dst_doc.getFirstSection().getBody().removeAllChildren()
263
+
264
+ # Import each node from the list into the new document. Keep the original formatting of the node.
265
+ import_format_mode = Rjb::import("com.aspose.words.ImportFormatMode")
266
+ importer = Rjb::import("com.aspose.words.NodeImporter").new(src_doc, dst_doc, import_format_mode.KEEP_SOURCE_FORMATTING)
267
+
268
+ i = 0
269
+ while i < nodes.size
270
+ node = nodes.get(i)
271
+ import_node = importer.importNode(node, true)
272
+ dst_doc.getFirstSection().getBody().appendChild(import_node)
273
+ i +=1
274
+ end
275
+
276
+ # Return the generated document.
277
+ dst_doc
278
+ end
279
+
280
+ def process_marker(cloneNode, nodes, node, isInclusive, isStartMarker, isEndMarker)
281
+ # If we are dealing with a block level node just see if it should be included and add it to the list.
282
+ if (!is_inline(node)) then
283
+ # Don't add the node twice if the markers are the same node
284
+ if(!(isStartMarker && isEndMarker)) then
285
+ if (isInclusive) then
286
+ nodes.add(cloneNode)
287
+ end
288
+ end
289
+ return
290
+ end
291
+
292
+ # If a marker is a FieldStart node check if it's to be included or not.
293
+ # We assume for simplicity that the FieldStart and FieldEnd appear in the same paragraph.
294
+ nodeType = Rjb::import("com.aspose.words.NodeType")
295
+ if (node.getNodeType() == nodeType.FIELD_START) then
296
+ # If the marker is a start node and is not be included then skip to the end of the field.
297
+ # If the marker is an end node and it is to be included then move to the end field so the field will not be removed.
298
+ #if ((isStartMarker && !isInclusive) || (!isStartMarker && isInclusive)) then
299
+ if ((isStartMarker && isInclusive.nil?) || (!isStartMarker && isInclusive)) then
300
+ #while (node.getNextSibling() != null && node.getNodeType() != nodeType.FIELD_END) do
301
+ while (node.getNextSibling().nil? && (node.getNodeType() != nodeType.FIELD_END)) do
302
+ node = node.getNextSibling()
303
+ end
304
+ end
305
+ end
306
+
307
+ # If either marker is part of a comment then to include the comment itself we need to move the pointer forward to the Comment
308
+ # node found after the CommentRangeEnd node.
309
+ if (node.getNodeType() == nodeType.COMMENT_RANGE_END) then
310
+ while (node.getNextSibling().nil? && (node.getNodeType() != nodeType.COMMENT)) do
311
+ node = node.getNextSibling()
312
+ end
313
+ end
314
+
315
+ # Find the corresponding node in our cloned node by index and return it.
316
+ # If the start and end node are the same some child nodes might already have been removed. Subtract the
317
+ # difference to get the right index.
318
+ indexDiff = (node.getParentNode().getChildNodes().getCount() - cloneNode.getChildNodes().getCount())
319
+
320
+ # Child node count identical.
321
+ if (indexDiff == 0) then
322
+ node = cloneNode.getChildNodes().get(node.getParentNode().indexOf(node))
323
+ else
324
+ node = cloneNode.getChildNodes().get(node.getParentNode().indexOf(node) - indexDiff)
325
+ end
326
+
327
+ # Remove the nodes up to/from the marker.
328
+ isSkip = ''
329
+ isProcessing = true
330
+ isRemoving = isStartMarker
331
+ nextNode = cloneNode.getFirstChild()
332
+ #while (isProcessing && nextNode != null) do
333
+ unless (isProcessing && nextNode.nil?)
334
+ currentNode = nextNode
335
+ isSkip = false
336
+ if (currentNode == node) then
337
+ if (isStartMarker) then
338
+ isProcessing = false
339
+ if isInclusive then
340
+ isRemoving = false
341
+ end
342
+ else
343
+ isRemoving = true
344
+ if isInclusive then
345
+ isSkip = true
346
+ end
347
+ end
348
+ end
349
+ nextNode = nextNode.getNextSibling()
350
+ #if (isRemoving && !isSkip) then
351
+ if (isRemoving && isSkip==false) then
352
+ currentNode.remove()
353
+ end
354
+ end
355
+
356
+ # After processing the composite node may become empty. If it has don't include it.
357
+ if (!(isStartMarker && isEndMarker)) then
358
+ if cloneNode.hasChildNodes() then
359
+ nodes.add(cloneNode)
360
+ end
361
+ end
362
+ end
363
+
364
+ def is_inline(node)
365
+ # Test if the node is desendant of a Paragraph or Table node and also is not a paragraph or a table a paragraph inside a comment class which is decesant of a pararaph is possible.
366
+ node_type = Rjb::import("com.aspose.words.NodeType")
367
+ #return ((node.getAncestor(node_type.PARAGRAPH) != null) || (node.getAncestor(node_type.TABLE) != null) && !(node.getNodeType() == nodeType.PARAGRAPH) || (node.getNodeType() == nodeType.TABLE))
368
+ return ((node.getAncestor(node_type.PARAGRAPH).nil?) || (node.getAncestor(node_type.TABLE).nil?) && !(node.getNodeType() == node_type.PARAGRAPH) || (node.getNodeType() == node_type.TABLE))
369
+ end
370
+
371
+ def paragraphs_by_style_name(doc, style_name)
372
+ # Create an array to collect paragraphs of the specified style.
373
+ paragraphsWithStyle = Rjb::import("java.util.ArrayList").new
374
+
375
+ # Get all paragraphs from the document.
376
+ node_type = Rjb::import("com.aspose.words.NodeType")
377
+ paragraphs = doc.getChildNodes(node_type.PARAGRAPH, true)
378
+ paragraphs_count = paragraphs.getCount()
379
+ #paragraphs_count = java_values($paragraphs_count)
380
+
381
+ # Look through all paragraphs to find those with the specified style.
382
+ i = 0
383
+ while (i < paragraphs_count) do
384
+ paragraphs = doc.getChildNodes(node_type.PARAGRAPH, true)
385
+ paragraph = paragraphs.get(i)
386
+ if (paragraph.getParagraphFormat().getStyle().getName() == style_name) then
387
+ paragraphsWithStyle.add(paragraph)
388
+ end
389
+ i = i + 1
390
+ end
391
+ paragraphsWithStyle
392
+ end
393
+
394
+ end
395
+ end