asposewordsjavaforruby 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/Gempackage +2 -2
  3. data/LICENSE +20 -20
  4. data/README.md +28 -2
  5. data/Rakefile +2 -2
  6. data/asposewordsjavaforruby.gemspec +27 -27
  7. data/config/aspose.yml +5 -5
  8. data/data/LoadTxt.txt +14 -14
  9. data/lib/asposewordsjavaforruby.rb +71 -71
  10. data/lib/asposewordsjavaforruby/addwatermark.rb +84 -84
  11. data/lib/asposewordsjavaforruby/appenddoc.rb +24 -24
  12. data/lib/asposewordsjavaforruby/appenddocument.rb +229 -229
  13. data/lib/asposewordsjavaforruby/applylicense.rb +16 -16
  14. data/lib/asposewordsjavaforruby/asposewordsjava.rb +23 -23
  15. data/lib/asposewordsjavaforruby/autofittables.rb +123 -123
  16. data/lib/asposewordsjavaforruby/bookmarks.rb +132 -132
  17. data/lib/asposewordsjavaforruby/checkformat.rb +70 -70
  18. data/lib/asposewordsjavaforruby/compressimages.rb +53 -53
  19. data/lib/asposewordsjavaforruby/doc2pdf.rb +15 -15
  20. data/lib/asposewordsjavaforruby/doctohtml.rb +26 -26
  21. data/lib/asposewordsjavaforruby/extractcontent.rb +395 -395
  22. data/lib/asposewordsjavaforruby/findandreplace.rb +29 -29
  23. data/lib/asposewordsjavaforruby/helloworld.rb +26 -26
  24. data/lib/asposewordsjavaforruby/imagetopdf.rb +71 -71
  25. data/lib/asposewordsjavaforruby/insertnestedfields.rb +39 -39
  26. data/lib/asposewordsjavaforruby/loadandsavetodisk.rb +20 -20
  27. data/lib/asposewordsjavaforruby/loadandsavetostream.rb +32 -32
  28. data/lib/asposewordsjavaforruby/loadtxt.rb +14 -14
  29. data/lib/asposewordsjavaforruby/mergefield.rb +45 -45
  30. data/lib/asposewordsjavaforruby/nodes.rb +29 -29
  31. data/lib/asposewordsjavaforruby/processcomments.rb +72 -72
  32. data/lib/asposewordsjavaforruby/removebreaks.rb +65 -65
  33. data/lib/asposewordsjavaforruby/removefield.rb +23 -23
  34. data/lib/asposewordsjavaforruby/saveasmultipagetiff.rb +26 -26
  35. data/lib/asposewordsjavaforruby/simplemailmerge.rb +23 -23
  36. data/lib/asposewordsjavaforruby/styles.rb +77 -77
  37. data/lib/asposewordsjavaforruby/updatefields.rb +62 -62
  38. data/lib/asposewordsjavaforruby/version.rb +3 -3
  39. metadata +18 -28
@@ -1,70 +1,70 @@
1
- require 'fileutils'
2
- module Asposewordsjavaforruby
3
- module CheckFormat
4
- def initialize()
5
- # The path to the documents directory.
6
- data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
7
-
8
- @supported_dir = data_dir + 'OutSupported/'
9
- file = Rjb::import("java.io.File").new(data_dir + 'joiningandappending/')
10
-
11
- check_fromat(file)
12
- end
13
-
14
- def check_fromat(file)
15
- files_list = file.listFiles()
16
- load_format = Rjb::import('com.aspose.words.LoadFormat')
17
-
18
- files_list.each do |file|
19
- if(file.isDirectory()) then
20
- next
21
- end
22
-
23
- name_only = file.getName()
24
- puts name_only
25
- file_name = file.getPath()
26
- puts file_name
27
-
28
- info_obj = Rjb::import('com.aspose.words.FileFormatUtil')
29
- info = info_obj.detectFileFormat(file_name)
30
- case info.getLoadFormat()
31
- when load_format.DOC
32
- puts "Microsoft Word 97-2003 document."
33
- when load_format.DOT
34
- puts "Microsoft Word 97-2003 template."
35
- when load_format.DOCX
36
- puts "Office Open XML WordprocessingML Macro-Free Document."
37
- when load_format.DOCM
38
- puts "Office Open XML WordprocessingML Macro-Enabled Document."
39
- when load_format.DOTX
40
- puts "Office Open XML WordprocessingML Macro-Free Template."
41
- when load_format.DOTM
42
- puts "Office Open XML WordprocessingML Macro-Enabled Template."
43
- when load_format.FLAT_OPC
44
- puts "Flat OPC document."
45
- when load_format.RTF
46
- puts "RTF format."
47
- when load_format.WORD_ML
48
- puts "Microsoft Word 2003 WordprocessingML format."
49
- when load_format.HTML
50
- puts "HTML format."
51
- when load_format.MHTML
52
- puts "MHTML (Web archive) format."
53
- when load_format.ODT
54
- puts "OpenDocument Text."
55
- when load_format.OTT
56
- puts "OpenDocument Text Template."
57
- when load_format.DOC_PRE_WORD_97
58
- puts "MS Word 6 or Word 95 format."
59
- else load_format.UNKNOWN
60
- puts "Unknown format."
61
- end
62
-
63
- dest_file_obj = Rjb::import("java.io.File").new(@supported_dir + name_only)
64
- dest_File = dest_file_obj.getPath()
65
- FileUtils.cp(file_name, dest_File)
66
- end
67
- end
68
-
69
- end
70
- end
1
+ require 'fileutils'
2
+ module Asposewordsjavaforruby
3
+ module CheckFormat
4
+ def initialize()
5
+ # The path to the documents directory.
6
+ data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
7
+
8
+ @supported_dir = data_dir + 'OutSupported/'
9
+ file = Rjb::import("java.io.File").new(data_dir + 'joiningandappending/')
10
+
11
+ check_fromat(file)
12
+ end
13
+
14
+ def check_fromat(file)
15
+ files_list = file.listFiles()
16
+ load_format = Rjb::import('com.aspose.words.LoadFormat')
17
+
18
+ files_list.each do |file|
19
+ if(file.isDirectory()) then
20
+ next
21
+ end
22
+
23
+ name_only = file.getName()
24
+ puts name_only
25
+ file_name = file.getPath()
26
+ puts file_name
27
+
28
+ info_obj = Rjb::import('com.aspose.words.FileFormatUtil')
29
+ info = info_obj.detectFileFormat(file_name)
30
+ case info.getLoadFormat()
31
+ when load_format.DOC
32
+ puts "Microsoft Word 97-2003 document."
33
+ when load_format.DOT
34
+ puts "Microsoft Word 97-2003 template."
35
+ when load_format.DOCX
36
+ puts "Office Open XML WordprocessingML Macro-Free Document."
37
+ when load_format.DOCM
38
+ puts "Office Open XML WordprocessingML Macro-Enabled Document."
39
+ when load_format.DOTX
40
+ puts "Office Open XML WordprocessingML Macro-Free Template."
41
+ when load_format.DOTM
42
+ puts "Office Open XML WordprocessingML Macro-Enabled Template."
43
+ when load_format.FLAT_OPC
44
+ puts "Flat OPC document."
45
+ when load_format.RTF
46
+ puts "RTF format."
47
+ when load_format.WORD_ML
48
+ puts "Microsoft Word 2003 WordprocessingML format."
49
+ when load_format.HTML
50
+ puts "HTML format."
51
+ when load_format.MHTML
52
+ puts "MHTML (Web archive) format."
53
+ when load_format.ODT
54
+ puts "OpenDocument Text."
55
+ when load_format.OTT
56
+ puts "OpenDocument Text Template."
57
+ when load_format.DOC_PRE_WORD_97
58
+ puts "MS Word 6 or Word 95 format."
59
+ else load_format.UNKNOWN
60
+ puts "Unknown format."
61
+ end
62
+
63
+ dest_file_obj = Rjb::import("java.io.File").new(@supported_dir + name_only)
64
+ dest_File = dest_file_obj.getPath()
65
+ FileUtils.cp(file_name, dest_File)
66
+ end
67
+ end
68
+
69
+ end
70
+ end
@@ -1,53 +1,53 @@
1
- module Asposewordsjavaforruby
2
- module CompressImages
3
- def initialize()
4
- # The path to the documents directory.
5
- @data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
6
- srcFileName = @data_dir + "TestCompressImages.docx"
7
-
8
- doc = Rjb::import('com.aspose.words.Document').new(@data_dir + "TestCompressImages.docx")
9
-
10
- # Demonstrate autofitting a table to the window.
11
- compress_images(doc, srcFileName)
12
- end
13
-
14
- def compress_images(doc, srcFileName)
15
- messageFormat = Rjb::import("java.text.MessageFormat")
16
- file_size = get_file_size(srcFileName)
17
-
18
- # 220ppi Print - said to be excellent on most printers and screens.
19
- # 150ppi Screen - said to be good for web pages and projectors.
20
- # 96ppi Email - said to be good for minimal document size and sharing.
21
- desiredPpi = 150
22
- # In Java this seems to be a good compression / quality setting.
23
- jpegQuality = 90
24
-
25
- # Resample images to desired ppi and save.
26
- resampler = Rjb::import("com.aspose.words.Resampler").new
27
- count = resampler.resample(doc, desiredPpi, jpegQuality)
28
- puts MessageFormat.format("Resampled {0} images.", count)
29
- if (count != 1) then
30
- puts "We expected to have only 1 image resampled in this test document!"
31
- end
32
- dstFileName = @data_dir + "TestCompressImages Out.docx"
33
- doc.save(dstFileName)
34
- puts messageFormat.format("Saving {0}. Size {1}.", dstFileName, get_file_size(dstFileName))
35
-
36
- # Verify that the first image was compressed by checking the new Ppi.
37
- dst_doc = Rjb::import("com.aspose.words.Document").new(dstFileName)
38
- nodeType = Rjb::import("com.aspose.words.NodeType")
39
- shape = dst_doc.getChild(nodeType.DRAWING_ML, 0, true)
40
- convertUtil = Rjb::import("com.aspose.words.ConvertUtil")
41
- imagePpi = shape.getImageData().getImageSize().getWidthPixels() / convertUtil.pointToInch(shape.getSize().getX())
42
- if (imagePpi < 150) then
43
- puts "Image was not resampled successfully."
44
- end
45
- end
46
-
47
- def get_file_size(file_name)
48
- file = Rjb::import("java.io.File").new(file_name)
49
- return file.length()
50
- end
51
-
52
- end
53
- end
1
+ module Asposewordsjavaforruby
2
+ module CompressImages
3
+ def initialize()
4
+ # The path to the documents directory.
5
+ @data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
6
+ srcFileName = @data_dir + "TestCompressImages.docx"
7
+
8
+ doc = Rjb::import('com.aspose.words.Document').new(@data_dir + "TestCompressImages.docx")
9
+
10
+ # Demonstrate autofitting a table to the window.
11
+ compress_images(doc, srcFileName)
12
+ end
13
+
14
+ def compress_images(doc, srcFileName)
15
+ messageFormat = Rjb::import("java.text.MessageFormat")
16
+ file_size = get_file_size(srcFileName)
17
+
18
+ # 220ppi Print - said to be excellent on most printers and screens.
19
+ # 150ppi Screen - said to be good for web pages and projectors.
20
+ # 96ppi Email - said to be good for minimal document size and sharing.
21
+ desiredPpi = 150
22
+ # In Java this seems to be a good compression / quality setting.
23
+ jpegQuality = 90
24
+
25
+ # Resample images to desired ppi and save.
26
+ resampler = Rjb::import("com.aspose.words.Resampler").new
27
+ count = resampler.resample(doc, desiredPpi, jpegQuality)
28
+ puts MessageFormat.format("Resampled {0} images.", count)
29
+ if (count != 1) then
30
+ puts "We expected to have only 1 image resampled in this test document!"
31
+ end
32
+ dstFileName = @data_dir + "TestCompressImages Out.docx"
33
+ doc.save(dstFileName)
34
+ puts messageFormat.format("Saving {0}. Size {1}.", dstFileName, get_file_size(dstFileName))
35
+
36
+ # Verify that the first image was compressed by checking the new Ppi.
37
+ dst_doc = Rjb::import("com.aspose.words.Document").new(dstFileName)
38
+ nodeType = Rjb::import("com.aspose.words.NodeType")
39
+ shape = dst_doc.getChild(nodeType.DRAWING_ML, 0, true)
40
+ convertUtil = Rjb::import("com.aspose.words.ConvertUtil")
41
+ imagePpi = shape.getImageData().getImageSize().getWidthPixels() / convertUtil.pointToInch(shape.getSize().getX())
42
+ if (imagePpi < 150) then
43
+ puts "Image was not resampled successfully."
44
+ end
45
+ end
46
+
47
+ def get_file_size(file_name)
48
+ file = Rjb::import("java.io.File").new(file_name)
49
+ return file.length()
50
+ end
51
+
52
+ end
53
+ end
@@ -1,15 +1,15 @@
1
- module Asposewordsjavaforruby
2
- module Doc2Pdf
3
-
4
- def doc_to_pdf()
5
- data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
6
-
7
- # Open document.
8
- document = Rjb::import('com.aspose.words.Document').new(data_dir + "Template.doc")
9
-
10
- # Save the document in PDF format.
11
- document.save(data_dir + "Doc2PdfSave Out.pdf")
12
- end
13
-
14
- end
15
- end
1
+ module Asposewordsjavaforruby
2
+ module Doc2Pdf
3
+
4
+ def doc_to_pdf()
5
+ data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
6
+
7
+ # Open document.
8
+ document = Rjb::import('com.aspose.words.Document').new(data_dir + "Template.doc")
9
+
10
+ # Save the document in PDF format.
11
+ document.save(data_dir + "Doc2PdfSave Out.pdf")
12
+ end
13
+
14
+ end
15
+ end
@@ -1,26 +1,26 @@
1
- module Asposewordsjavaforruby
2
- module DocToHTML
3
- def initialize()
4
- # The path to the documents directory.
5
- data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
6
-
7
- # Open the document.
8
- doc = Rjb::import('com.aspose.words.Document').new(data_dir + "TestFile.doc")
9
-
10
- #HtmlSaveOptions options = new HtmlSaveOptions();
11
- options = Rjb::import('com.aspose.words.HtmlSaveOptions').new
12
-
13
- # HtmlSaveOptions.ExportRoundtripInformation property specifies
14
- # whether to write the roundtrip information when saving to HTML, MHTML or EPUB.
15
- # Default value is true for HTML and false for MHTML and EPUB.
16
- options.setExportRoundtripInformation(true)
17
- doc.save(data_dir + "ExportRoundtripInformation Out.html", options)
18
-
19
- doc = Rjb::import('com.aspose.words.Document').new(data_dir + "ExportRoundtripInformation Out.html")
20
-
21
- # Save the document Docx file format
22
- save_format = Rjb::import('com.aspose.words.SaveFormat')
23
- doc.save(data_dir + "Out.docx", save_format.DOCX)
24
- end
25
- end
26
- end
1
+ module Asposewordsjavaforruby
2
+ module DocToHTML
3
+ def initialize()
4
+ # The path to the documents directory.
5
+ data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/'
6
+
7
+ # Open the document.
8
+ doc = Rjb::import('com.aspose.words.Document').new(data_dir + "TestFile.doc")
9
+
10
+ #HtmlSaveOptions options = new HtmlSaveOptions();
11
+ options = Rjb::import('com.aspose.words.HtmlSaveOptions').new
12
+
13
+ # HtmlSaveOptions.ExportRoundtripInformation property specifies
14
+ # whether to write the roundtrip information when saving to HTML, MHTML or EPUB.
15
+ # Default value is true for HTML and false for MHTML and EPUB.
16
+ options.setExportRoundtripInformation(true)
17
+ doc.save(data_dir + "ExportRoundtripInformation Out.html", options)
18
+
19
+ doc = Rjb::import('com.aspose.words.Document').new(data_dir + "ExportRoundtripInformation Out.html")
20
+
21
+ # Save the document Docx file format
22
+ save_format = Rjb::import('com.aspose.words.SaveFormat')
23
+ doc.save(data_dir + "Out.docx", save_format.DOCX)
24
+ end
25
+ end
26
+ end
@@ -1,395 +1,395 @@
1
- module Asposewordsjavaforruby
2
- module ExtractContent
3
- def initialize()
4
- # The path to the documents directory.
5
- @data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/document/'
6
-
7
- # Open the document.
8
- doc = Rjb::import('com.aspose.words.Document').new(@data_dir + "TestFile.doc")
9
-
10
- extract_content_between_paragraphs(doc)
11
- extract_content_between_block_level_nodes(doc)
12
- extract_content_between_paragraph_styles(doc)
13
- extract_content_between_runs(doc)
14
- extract_content_using_field(doc)
15
- extract_content_between_bookmark(doc)
16
- extract_content_between_comment_range(doc)
17
- end
18
-
19
- def extract_content_between_paragraphs(doc)
20
- # Gather the nodes. The GetChild method uses 0-based index
21
- node_type = Rjb::import("com.aspose.words.NodeType")
22
- start_para = doc.getFirstSection().getChild(node_type.PARAGRAPH, 6, true)
23
- end_para = doc.getFirstSection().getChild(node_type.PARAGRAPH, 10, true)
24
-
25
- # Extract the content between these nodes in the document. Include these markers in the extraction.
26
- extracted_nodes = extract_contents(start_para, end_para, true)
27
-
28
- # Insert the content into a new separate document and save it to disk.
29
- dst_doc = generate_document(doc, extracted_nodes)
30
- dst_doc.save(@data_dir + "TestFile.Paragraphs Out.doc")
31
- end
32
-
33
- def extract_content_between_block_level_nodes(doc)
34
- # Gather the nodes. The GetChild method uses 0-based index
35
- node_type = Rjb::import("com.aspose.words.NodeType")
36
- start_para = doc.getLastSection().getChild(node_type.PARAGRAPH, 2, true)
37
- end_table = doc.getLastSection().getChild(node_type.TABLE, 0, true)
38
-
39
- # Extract the content between these nodes in the document. Include these markers in the extraction.
40
- extracted_nodes = extract_contents(start_para, end_table, true)
41
-
42
- # Lets reverse the array to make inserting the content back into the document easier.
43
- collections = Rjb::import("java.util.Collections")
44
- collections.reverse(extracted_nodes)
45
-
46
- while extracted_nodes.size() > 0 do
47
- # Insert the last node from the reversed list
48
- end_table.getParentNode().insertAfter(extracted_nodes.get(0), end_table)
49
- # Remove this node from the list after insertion.
50
- extracted_nodes.remove(0)
51
- end
52
-
53
- # Save the generated document to disk.
54
- doc.save(@data_dir + "TestFile.DuplicatedContent Out.doc")
55
- end
56
-
57
- def extract_content_between_paragraph_styles(doc)
58
- # Gather a list of the paragraphs using the respective heading styles.
59
- paras_style_heading1 = paragraphs_by_style_name(doc, "Heading 1")
60
- paras_style_heading3 = paragraphs_by_style_name(doc, "Heading 3")
61
-
62
- # Use the first instance of the paragraphs with those styles.
63
- start_para1 = paras_style_heading1.get(0)
64
- end_para1 = paras_style_heading3.get(0)
65
-
66
- # Extract the content between these nodes in the document. Don't include these markers in the extraction.
67
- extracted_nodes = extract_contents(start_para1, end_para1, false)
68
-
69
- # Insert the content into a new separate document and save it to disk.
70
- dst_doc = generate_document(doc, extracted_nodes)
71
- dst_doc.save(@data_dir + "TestFile.Styles Out.doc")
72
- end
73
-
74
- def extract_content_between_runs(doc)
75
- # Retrieve a paragraph from the first section.
76
- node_type = Rjb::import("com.aspose.words.NodeType")
77
- para = doc.getChild(node_type.PARAGRAPH, 7, true)
78
-
79
- # Use some runs for extraction.
80
- start_run = para.getRuns().get(1)
81
- end_run = para.getRuns().get(4)
82
-
83
- # Extract the content between these nodes in the document. Include these markers in the extraction.
84
- extracted_nodes = extract_contents(start_run, end_run, true)
85
-
86
- # Get the node from the list. There should only be one paragraph returned in the list.
87
- node = extracted_nodes.get(0)
88
-
89
- # Print the text of this node to the console.
90
- save_format = Rjb::import("com.aspose.words.SaveFormat")
91
- puts node.toString(save_format.TEXT)
92
- end
93
-
94
- def extract_content_using_field(doc)
95
- # Use a document builder to retrieve the field start of a merge field.
96
- builder = Rjb::import("com.aspose.words.DocumentBuilder").new(doc)
97
-
98
- # Pass the first boolean parameter to get the DocumentBuilder to move to the FieldStart of the field.
99
- # We could also get FieldStarts of a field using GetChildNode method as in the other examples.
100
- builder.moveToMergeField("Fullname", false, false)
101
-
102
- #/ The builder cursor should be positioned at the start of the field.
103
- node_type = Rjb::import("com.aspose.words.NodeType")
104
- start_field = builder.getCurrentNode()
105
- end_para = doc.getFirstSection().getChild(node_type.PARAGRAPH, 5, true)
106
-
107
- # Extract the content between these nodes in the document. Don't include these markers in the extraction.
108
- extracted_nodes = extract_contents(start_field, end_para, false)
109
-
110
- # Insert the content into a new separate document and save it to disk.
111
- dst_doc = generate_document(doc, extracted_nodes)
112
- dst_doc.save(@data_dir + "TestFile.Fields Out.doc")
113
- end
114
-
115
- def extract_content_between_bookmark(doc)
116
- # Retrieve the bookmark from the document.
117
- bookmark = doc.getRange().getBookmarks().get("Bookmark1")
118
-
119
- # We use the BookmarkStart and BookmarkEnd nodes as markers.
120
- bookmark_start = bookmark.getBookmarkStart()
121
- bookmark_end = bookmark.getBookmarkEnd()
122
-
123
- # Firstly extract the content between these nodes including the bookmark.
124
- extracted_nodes_inclusive = extract_contents(bookmark_start, bookmark_end, true)
125
- dst_doc = generate_document(doc, extracted_nodes_inclusive)
126
- dst_doc.save(@data_dir + "TestFile.BookmarkInclusive Out.doc")
127
-
128
- # Secondly extract the content between these nodes this time without including the bookmark.
129
- extracted_nodes_exclusive = extract_contents(bookmark_start, bookmark_end, false)
130
- dst_doc = generate_document(doc, extracted_nodes_exclusive)
131
- dst_doc.save(@data_dir + "TestFile.BookmarkExclusive Out.doc")
132
- end
133
-
134
- def extract_content_between_comment_range(doc)
135
- # This is a quick way of getting both comment nodes.
136
- # Your code should have a proper method of retrieving each corresponding start and end node.
137
- node_type = Rjb::import("com.aspose.words.NodeType")
138
- comment_start = doc.getChild(node_type.COMMENT_RANGE_START, 0, true)
139
- comment_end = doc.getChild(node_type.COMMENT_RANGE_END, 0, true)
140
-
141
- # Firstly extract the content between these nodes including the bookmark.
142
- extracted_nodes_inclusive = extract_contents(comment_start, comment_end, true)
143
- dst_doc = generate_document(doc, extracted_nodes_inclusive)
144
- dst_doc.save(@data_dir + "TestFile.CommentInclusive Out.doc")
145
-
146
- # Secondly extract the content between these nodes this time without including the bookmark.
147
- extracted_nodes_exclusive = extract_contents(comment_start, comment_end, false)
148
- dst_doc = generate_document(doc, extracted_nodes_exclusive)
149
- dst_doc.save(@data_dir + "TestFile.CommentExclusive Out.doc")
150
- end
151
-
152
- =begin
153
- This is a method which extracts blocks of content from a document between specified nodes.
154
-
155
- Extracts a range of nodes from a document found between specified markers and returns a copy of those nodes. Content can be extracted
156
- between inline nodes, block level nodes, and also special nodes such as Comment or Boomarks. Any combination of different marker types can used.
157
-
158
- @param string startNode The node which defines where to start the extraction from the document. This node can be block or inline level of a body.
159
- @param string endNode The node which defines where to stop the extraction from the document. This node can be block or inline level of body.
160
- @param boolean isInclusive Should the marker nodes be included.
161
- =end
162
- def extract_contents(startNode, endNode, isInclusive)
163
- # First check that the nodes passed to this method are valid for use.
164
- verify_parameter_nodes(startNode, endNode)
165
-
166
- # Create a list to store the extracted nodes.
167
- nodes = Rjb::import("java.util.ArrayList").new
168
-
169
- # Keep a record of the original nodes passed to this method so we can split marker nodes if needed.
170
- originalStartNode = startNode
171
- originalEndNode = endNode
172
-
173
- # Extract content based on block level nodes (paragraphs and tables). Traverse through parent nodes to find them.
174
- # We will split the content of first and last nodes depending if the marker nodes are inline
175
- node_type = Rjb::import("com.aspose.words.NodeType")
176
-
177
- while (startNode.getParentNode().getNodeType() != node_type.BODY) do
178
- startNode = startNode.getParentNode()
179
- end
180
-
181
- while (endNode.getParentNode().getNodeType() != node_type.BODY) do
182
- endNode = endNode.getParentNode()
183
- end
184
-
185
- isExtracting = true
186
- isStartingNode = true
187
- isEndingNode = ''
188
- #The current node we are extracting from the document.
189
- currNode = startNode
190
-
191
- #Begin extracting content. Process all block level nodes and specifically split the first and last nodes when needed so paragraph formatting is retained.
192
- # Method is little more complex than a regular extractor as we need to factor in extracting using inline nodes, fields, bookmarks etc as to make it really useful.
193
- while (isExtracting) do
194
- # Clone the current node and its children to obtain a copy.
195
- cloneNode = currNode.deepClone(true)
196
- isEndingNode = currNode.equals(endNode)
197
-
198
- if (isStartingNode || isEndingNode) then
199
- # We need to process each marker separately so pass it off to a separate method instead.
200
- if (isStartingNode) then
201
- process_marker(cloneNode, nodes, originalStartNode, isInclusive, isStartingNode, isEndingNode)
202
- isStartingNode = false
203
- end
204
- # Conditional needs to be separate as the block level start and end markers maybe the same node.
205
- if (isEndingNode) then
206
- process_marker(cloneNode, nodes, originalEndNode, isInclusive, isStartingNode, isEndingNode)
207
- isExtracting = false
208
- end
209
- else
210
- # Node is not a start or end marker, simply add the copy to the list.
211
- nodes.add(cloneNode)
212
- end
213
-
214
- # Move to the next node and extract it. If next node is null that means the rest of the content is found in a different section.
215
- #if (currNode.getNextSibling() == null && isExtracting) then
216
- if ((currNode.getNextSibling()).nil? && isExtracting) then
217
- # Move to the next section.
218
- nodeType = Rjb::import("com.aspose.words.NodeType")
219
- nextSection = currNode.getAncestor(nodeType.SECTION).getNextSibling()
220
- currNode = nextSection.getBody().getFirstChild()
221
- else
222
- # Move to the next node in the body.
223
- currNode = currNode.getNextSibling()
224
- end
225
- end
226
- # Return the nodes between the node markers.
227
- nodes
228
- end
229
-
230
- =begin
231
- Checks the input parameters are correct and can be used. Throws an exception if there is any problem.
232
- =end
233
- def verify_parameter_nodes(startNode, endNode)
234
- # The order in which these checks are done is important.
235
- raise 'Start node cannot be null' if startNode.nil?
236
- raise 'End node cannot be null' if endNode.nil?
237
- raise "Start node and end node must belong to the same document" if (startNode.getDocument() == endNode.getDocument())
238
-
239
- nodeType = Rjb::import("com.aspose.words.NodeType")
240
- #raise "Start node and end node must be a child or descendant of a body" if (startNode.getAncestor(nodeType.BODY) == '' || endNode.getAncestor(nodeType.BODY) == '')
241
- raise "Start node and end node must be a child or descendant of a body" if (startNode.getAncestor(nodeType.BODY).nil? || endNode.getAncestor(nodeType.BODY).nil?)
242
-
243
- # Check the end node is after the start node in the DOM tree
244
- # First check if they are in different sections, then if they're not check their position in the body of the same section they are in.
245
- startSection = startNode.getAncestor(nodeType.SECTION)
246
- endSection = endNode.getAncestor(nodeType.SECTION)
247
- startIndex = startSection.getParentNode().indexOf(startSection)
248
- endIndex = endSection.getParentNode().indexOf(endSection)
249
-
250
- if (startIndex == endIndex) then
251
- raise "The end node must be after the start node in the body" if (startSection.getBody().indexOf(startNode) > endSection.getBody().indexOf(endNode))
252
- elsif (startIndex > endIndex) then
253
- raise "The section of end node must be after the section start node"
254
- end
255
- end
256
-
257
- def generate_document(src_doc, nodes)
258
- # Create a blank document.
259
- dst_doc = Rjb::import("com.aspose.words.Document").new
260
-
261
- # Remove the first paragraph from the empty document.
262
- dst_doc.getFirstSection().getBody().removeAllChildren()
263
-
264
- # Import each node from the list into the new document. Keep the original formatting of the node.
265
- import_format_mode = Rjb::import("com.aspose.words.ImportFormatMode")
266
- importer = Rjb::import("com.aspose.words.NodeImporter").new(src_doc, dst_doc, import_format_mode.KEEP_SOURCE_FORMATTING)
267
-
268
- i = 0
269
- while i < nodes.size
270
- node = nodes.get(i)
271
- import_node = importer.importNode(node, true)
272
- dst_doc.getFirstSection().getBody().appendChild(import_node)
273
- i +=1
274
- end
275
-
276
- # Return the generated document.
277
- dst_doc
278
- end
279
-
280
- def process_marker(cloneNode, nodes, node, isInclusive, isStartMarker, isEndMarker)
281
- # If we are dealing with a block level node just see if it should be included and add it to the list.
282
- if (!is_inline(node)) then
283
- # Don't add the node twice if the markers are the same node
284
- if(!(isStartMarker && isEndMarker)) then
285
- if (isInclusive) then
286
- nodes.add(cloneNode)
287
- end
288
- end
289
- return
290
- end
291
-
292
- # If a marker is a FieldStart node check if it's to be included or not.
293
- # We assume for simplicity that the FieldStart and FieldEnd appear in the same paragraph.
294
- nodeType = Rjb::import("com.aspose.words.NodeType")
295
- if (node.getNodeType() == nodeType.FIELD_START) then
296
- # If the marker is a start node and is not be included then skip to the end of the field.
297
- # If the marker is an end node and it is to be included then move to the end field so the field will not be removed.
298
- #if ((isStartMarker && !isInclusive) || (!isStartMarker && isInclusive)) then
299
- if ((isStartMarker && isInclusive.nil?) || (!isStartMarker && isInclusive)) then
300
- #while (node.getNextSibling() != null && node.getNodeType() != nodeType.FIELD_END) do
301
- while (node.getNextSibling().nil? && (node.getNodeType() != nodeType.FIELD_END)) do
302
- node = node.getNextSibling()
303
- end
304
- end
305
- end
306
-
307
- # If either marker is part of a comment then to include the comment itself we need to move the pointer forward to the Comment
308
- # node found after the CommentRangeEnd node.
309
- if (node.getNodeType() == nodeType.COMMENT_RANGE_END) then
310
- while (node.getNextSibling().nil? && (node.getNodeType() != nodeType.COMMENT)) do
311
- node = node.getNextSibling()
312
- end
313
- end
314
-
315
- # Find the corresponding node in our cloned node by index and return it.
316
- # If the start and end node are the same some child nodes might already have been removed. Subtract the
317
- # difference to get the right index.
318
- indexDiff = (node.getParentNode().getChildNodes().getCount() - cloneNode.getChildNodes().getCount())
319
-
320
- # Child node count identical.
321
- if (indexDiff == 0) then
322
- node = cloneNode.getChildNodes().get(node.getParentNode().indexOf(node))
323
- else
324
- node = cloneNode.getChildNodes().get(node.getParentNode().indexOf(node) - indexDiff)
325
- end
326
-
327
- # Remove the nodes up to/from the marker.
328
- isSkip = ''
329
- isProcessing = true
330
- isRemoving = isStartMarker
331
- nextNode = cloneNode.getFirstChild()
332
- #while (isProcessing && nextNode != null) do
333
- unless (isProcessing && nextNode.nil?)
334
- currentNode = nextNode
335
- isSkip = false
336
- if (currentNode == node) then
337
- if (isStartMarker) then
338
- isProcessing = false
339
- if isInclusive then
340
- isRemoving = false
341
- end
342
- else
343
- isRemoving = true
344
- if isInclusive then
345
- isSkip = true
346
- end
347
- end
348
- end
349
- nextNode = nextNode.getNextSibling()
350
- #if (isRemoving && !isSkip) then
351
- if (isRemoving && isSkip==false) then
352
- currentNode.remove()
353
- end
354
- end
355
-
356
- # After processing the composite node may become empty. If it has don't include it.
357
- if (!(isStartMarker && isEndMarker)) then
358
- if cloneNode.hasChildNodes() then
359
- nodes.add(cloneNode)
360
- end
361
- end
362
- end
363
-
364
- def is_inline(node)
365
- # Test if the node is desendant of a Paragraph or Table node and also is not a paragraph or a table a paragraph inside a comment class which is decesant of a pararaph is possible.
366
- node_type = Rjb::import("com.aspose.words.NodeType")
367
- #return ((node.getAncestor(node_type.PARAGRAPH) != null) || (node.getAncestor(node_type.TABLE) != null) && !(node.getNodeType() == nodeType.PARAGRAPH) || (node.getNodeType() == nodeType.TABLE))
368
- return ((node.getAncestor(node_type.PARAGRAPH).nil?) || (node.getAncestor(node_type.TABLE).nil?) && !(node.getNodeType() == node_type.PARAGRAPH) || (node.getNodeType() == node_type.TABLE))
369
- end
370
-
371
- def paragraphs_by_style_name(doc, style_name)
372
- # Create an array to collect paragraphs of the specified style.
373
- paragraphsWithStyle = Rjb::import("java.util.ArrayList").new
374
-
375
- # Get all paragraphs from the document.
376
- node_type = Rjb::import("com.aspose.words.NodeType")
377
- paragraphs = doc.getChildNodes(node_type.PARAGRAPH, true)
378
- paragraphs_count = paragraphs.getCount()
379
- #paragraphs_count = java_values($paragraphs_count)
380
-
381
- # Look through all paragraphs to find those with the specified style.
382
- i = 0
383
- while (i < paragraphs_count) do
384
- paragraphs = doc.getChildNodes(node_type.PARAGRAPH, true)
385
- paragraph = paragraphs.get(i)
386
- if (paragraph.getParagraphFormat().getStyle().getName() == style_name) then
387
- paragraphsWithStyle.add(paragraph)
388
- end
389
- i = i + 1
390
- end
391
- paragraphsWithStyle
392
- end
393
-
394
- end
395
- end
1
+ module Asposewordsjavaforruby
2
+ module ExtractContent
3
+ def initialize()
4
+ # The path to the documents directory.
5
+ @data_dir = File.dirname(File.dirname(File.dirname(__FILE__))) + '/data/document/'
6
+
7
+ # Open the document.
8
+ doc = Rjb::import('com.aspose.words.Document').new(@data_dir + "TestFile.doc")
9
+
10
+ extract_content_between_paragraphs(doc)
11
+ extract_content_between_block_level_nodes(doc)
12
+ extract_content_between_paragraph_styles(doc)
13
+ extract_content_between_runs(doc)
14
+ extract_content_using_field(doc)
15
+ extract_content_between_bookmark(doc)
16
+ extract_content_between_comment_range(doc)
17
+ end
18
+
19
+ def extract_content_between_paragraphs(doc)
20
+ # Gather the nodes. The GetChild method uses 0-based index
21
+ node_type = Rjb::import("com.aspose.words.NodeType")
22
+ start_para = doc.getFirstSection().getChild(node_type.PARAGRAPH, 6, true)
23
+ end_para = doc.getFirstSection().getChild(node_type.PARAGRAPH, 10, true)
24
+
25
+ # Extract the content between these nodes in the document. Include these markers in the extraction.
26
+ extracted_nodes = extract_contents(start_para, end_para, true)
27
+
28
+ # Insert the content into a new separate document and save it to disk.
29
+ dst_doc = generate_document(doc, extracted_nodes)
30
+ dst_doc.save(@data_dir + "TestFile.Paragraphs Out.doc")
31
+ end
32
+
33
+ def extract_content_between_block_level_nodes(doc)
34
+ # Gather the nodes. The GetChild method uses 0-based index
35
+ node_type = Rjb::import("com.aspose.words.NodeType")
36
+ start_para = doc.getLastSection().getChild(node_type.PARAGRAPH, 2, true)
37
+ end_table = doc.getLastSection().getChild(node_type.TABLE, 0, true)
38
+
39
+ # Extract the content between these nodes in the document. Include these markers in the extraction.
40
+ extracted_nodes = extract_contents(start_para, end_table, true)
41
+
42
+ # Lets reverse the array to make inserting the content back into the document easier.
43
+ collections = Rjb::import("java.util.Collections")
44
+ collections.reverse(extracted_nodes)
45
+
46
+ while extracted_nodes.size() > 0 do
47
+ # Insert the last node from the reversed list
48
+ end_table.getParentNode().insertAfter(extracted_nodes.get(0), end_table)
49
+ # Remove this node from the list after insertion.
50
+ extracted_nodes.remove(0)
51
+ end
52
+
53
+ # Save the generated document to disk.
54
+ doc.save(@data_dir + "TestFile.DuplicatedContent Out.doc")
55
+ end
56
+
57
+ def extract_content_between_paragraph_styles(doc)
58
+ # Gather a list of the paragraphs using the respective heading styles.
59
+ paras_style_heading1 = paragraphs_by_style_name(doc, "Heading 1")
60
+ paras_style_heading3 = paragraphs_by_style_name(doc, "Heading 3")
61
+
62
+ # Use the first instance of the paragraphs with those styles.
63
+ start_para1 = paras_style_heading1.get(0)
64
+ end_para1 = paras_style_heading3.get(0)
65
+
66
+ # Extract the content between these nodes in the document. Don't include these markers in the extraction.
67
+ extracted_nodes = extract_contents(start_para1, end_para1, false)
68
+
69
+ # Insert the content into a new separate document and save it to disk.
70
+ dst_doc = generate_document(doc, extracted_nodes)
71
+ dst_doc.save(@data_dir + "TestFile.Styles Out.doc")
72
+ end
73
+
74
+ def extract_content_between_runs(doc)
75
+ # Retrieve a paragraph from the first section.
76
+ node_type = Rjb::import("com.aspose.words.NodeType")
77
+ para = doc.getChild(node_type.PARAGRAPH, 7, true)
78
+
79
+ # Use some runs for extraction.
80
+ start_run = para.getRuns().get(1)
81
+ end_run = para.getRuns().get(4)
82
+
83
+ # Extract the content between these nodes in the document. Include these markers in the extraction.
84
+ extracted_nodes = extract_contents(start_run, end_run, true)
85
+
86
+ # Get the node from the list. There should only be one paragraph returned in the list.
87
+ node = extracted_nodes.get(0)
88
+
89
+ # Print the text of this node to the console.
90
+ save_format = Rjb::import("com.aspose.words.SaveFormat")
91
+ puts node.toString(save_format.TEXT)
92
+ end
93
+
94
+ def extract_content_using_field(doc)
95
+ # Use a document builder to retrieve the field start of a merge field.
96
+ builder = Rjb::import("com.aspose.words.DocumentBuilder").new(doc)
97
+
98
+ # Pass the first boolean parameter to get the DocumentBuilder to move to the FieldStart of the field.
99
+ # We could also get FieldStarts of a field using GetChildNode method as in the other examples.
100
+ builder.moveToMergeField("Fullname", false, false)
101
+
102
+ #/ The builder cursor should be positioned at the start of the field.
103
+ node_type = Rjb::import("com.aspose.words.NodeType")
104
+ start_field = builder.getCurrentNode()
105
+ end_para = doc.getFirstSection().getChild(node_type.PARAGRAPH, 5, true)
106
+
107
+ # Extract the content between these nodes in the document. Don't include these markers in the extraction.
108
+ extracted_nodes = extract_contents(start_field, end_para, false)
109
+
110
+ # Insert the content into a new separate document and save it to disk.
111
+ dst_doc = generate_document(doc, extracted_nodes)
112
+ dst_doc.save(@data_dir + "TestFile.Fields Out.doc")
113
+ end
114
+
115
+ def extract_content_between_bookmark(doc)
116
+ # Retrieve the bookmark from the document.
117
+ bookmark = doc.getRange().getBookmarks().get("Bookmark1")
118
+
119
+ # We use the BookmarkStart and BookmarkEnd nodes as markers.
120
+ bookmark_start = bookmark.getBookmarkStart()
121
+ bookmark_end = bookmark.getBookmarkEnd()
122
+
123
+ # Firstly extract the content between these nodes including the bookmark.
124
+ extracted_nodes_inclusive = extract_contents(bookmark_start, bookmark_end, true)
125
+ dst_doc = generate_document(doc, extracted_nodes_inclusive)
126
+ dst_doc.save(@data_dir + "TestFile.BookmarkInclusive Out.doc")
127
+
128
+ # Secondly extract the content between these nodes this time without including the bookmark.
129
+ extracted_nodes_exclusive = extract_contents(bookmark_start, bookmark_end, false)
130
+ dst_doc = generate_document(doc, extracted_nodes_exclusive)
131
+ dst_doc.save(@data_dir + "TestFile.BookmarkExclusive Out.doc")
132
+ end
133
+
134
+ def extract_content_between_comment_range(doc)
135
+ # This is a quick way of getting both comment nodes.
136
+ # Your code should have a proper method of retrieving each corresponding start and end node.
137
+ node_type = Rjb::import("com.aspose.words.NodeType")
138
+ comment_start = doc.getChild(node_type.COMMENT_RANGE_START, 0, true)
139
+ comment_end = doc.getChild(node_type.COMMENT_RANGE_END, 0, true)
140
+
141
+ # Firstly extract the content between these nodes including the bookmark.
142
+ extracted_nodes_inclusive = extract_contents(comment_start, comment_end, true)
143
+ dst_doc = generate_document(doc, extracted_nodes_inclusive)
144
+ dst_doc.save(@data_dir + "TestFile.CommentInclusive Out.doc")
145
+
146
+ # Secondly extract the content between these nodes this time without including the bookmark.
147
+ extracted_nodes_exclusive = extract_contents(comment_start, comment_end, false)
148
+ dst_doc = generate_document(doc, extracted_nodes_exclusive)
149
+ dst_doc.save(@data_dir + "TestFile.CommentExclusive Out.doc")
150
+ end
151
+
152
+ =begin
153
+ This is a method which extracts blocks of content from a document between specified nodes.
154
+
155
+ Extracts a range of nodes from a document found between specified markers and returns a copy of those nodes. Content can be extracted
156
+ between inline nodes, block level nodes, and also special nodes such as Comment or Boomarks. Any combination of different marker types can used.
157
+
158
+ @param string startNode The node which defines where to start the extraction from the document. This node can be block or inline level of a body.
159
+ @param string endNode The node which defines where to stop the extraction from the document. This node can be block or inline level of body.
160
+ @param boolean isInclusive Should the marker nodes be included.
161
+ =end
162
+ def extract_contents(startNode, endNode, isInclusive)
163
+ # First check that the nodes passed to this method are valid for use.
164
+ verify_parameter_nodes(startNode, endNode)
165
+
166
+ # Create a list to store the extracted nodes.
167
+ nodes = Rjb::import("java.util.ArrayList").new
168
+
169
+ # Keep a record of the original nodes passed to this method so we can split marker nodes if needed.
170
+ originalStartNode = startNode
171
+ originalEndNode = endNode
172
+
173
+ # Extract content based on block level nodes (paragraphs and tables). Traverse through parent nodes to find them.
174
+ # We will split the content of first and last nodes depending if the marker nodes are inline
175
+ node_type = Rjb::import("com.aspose.words.NodeType")
176
+
177
+ while (startNode.getParentNode().getNodeType() != node_type.BODY) do
178
+ startNode = startNode.getParentNode()
179
+ end
180
+
181
+ while (endNode.getParentNode().getNodeType() != node_type.BODY) do
182
+ endNode = endNode.getParentNode()
183
+ end
184
+
185
+ isExtracting = true
186
+ isStartingNode = true
187
+ isEndingNode = ''
188
+ #The current node we are extracting from the document.
189
+ currNode = startNode
190
+
191
+ #Begin extracting content. Process all block level nodes and specifically split the first and last nodes when needed so paragraph formatting is retained.
192
+ # Method is little more complex than a regular extractor as we need to factor in extracting using inline nodes, fields, bookmarks etc as to make it really useful.
193
+ while (isExtracting) do
194
+ # Clone the current node and its children to obtain a copy.
195
+ cloneNode = currNode.deepClone(true)
196
+ isEndingNode = currNode.equals(endNode)
197
+
198
+ if (isStartingNode || isEndingNode) then
199
+ # We need to process each marker separately so pass it off to a separate method instead.
200
+ if (isStartingNode) then
201
+ process_marker(cloneNode, nodes, originalStartNode, isInclusive, isStartingNode, isEndingNode)
202
+ isStartingNode = false
203
+ end
204
+ # Conditional needs to be separate as the block level start and end markers maybe the same node.
205
+ if (isEndingNode) then
206
+ process_marker(cloneNode, nodes, originalEndNode, isInclusive, isStartingNode, isEndingNode)
207
+ isExtracting = false
208
+ end
209
+ else
210
+ # Node is not a start or end marker, simply add the copy to the list.
211
+ nodes.add(cloneNode)
212
+ end
213
+
214
+ # Move to the next node and extract it. If next node is null that means the rest of the content is found in a different section.
215
+ #if (currNode.getNextSibling() == null && isExtracting) then
216
+ if ((currNode.getNextSibling()).nil? && isExtracting) then
217
+ # Move to the next section.
218
+ nodeType = Rjb::import("com.aspose.words.NodeType")
219
+ nextSection = currNode.getAncestor(nodeType.SECTION).getNextSibling()
220
+ currNode = nextSection.getBody().getFirstChild()
221
+ else
222
+ # Move to the next node in the body.
223
+ currNode = currNode.getNextSibling()
224
+ end
225
+ end
226
+ # Return the nodes between the node markers.
227
+ nodes
228
+ end
229
+
230
+ =begin
231
+ Checks the input parameters are correct and can be used. Throws an exception if there is any problem.
232
+ =end
233
+ def verify_parameter_nodes(startNode, endNode)
234
+ # The order in which these checks are done is important.
235
+ raise 'Start node cannot be null' if startNode.nil?
236
+ raise 'End node cannot be null' if endNode.nil?
237
+ raise "Start node and end node must belong to the same document" if (startNode.getDocument() == endNode.getDocument())
238
+
239
+ nodeType = Rjb::import("com.aspose.words.NodeType")
240
+ #raise "Start node and end node must be a child or descendant of a body" if (startNode.getAncestor(nodeType.BODY) == '' || endNode.getAncestor(nodeType.BODY) == '')
241
+ raise "Start node and end node must be a child or descendant of a body" if (startNode.getAncestor(nodeType.BODY).nil? || endNode.getAncestor(nodeType.BODY).nil?)
242
+
243
+ # Check the end node is after the start node in the DOM tree
244
+ # First check if they are in different sections, then if they're not check their position in the body of the same section they are in.
245
+ startSection = startNode.getAncestor(nodeType.SECTION)
246
+ endSection = endNode.getAncestor(nodeType.SECTION)
247
+ startIndex = startSection.getParentNode().indexOf(startSection)
248
+ endIndex = endSection.getParentNode().indexOf(endSection)
249
+
250
+ if (startIndex == endIndex) then
251
+ raise "The end node must be after the start node in the body" if (startSection.getBody().indexOf(startNode) > endSection.getBody().indexOf(endNode))
252
+ elsif (startIndex > endIndex) then
253
+ raise "The section of end node must be after the section start node"
254
+ end
255
+ end
256
+
257
+ def generate_document(src_doc, nodes)
258
+ # Create a blank document.
259
+ dst_doc = Rjb::import("com.aspose.words.Document").new
260
+
261
+ # Remove the first paragraph from the empty document.
262
+ dst_doc.getFirstSection().getBody().removeAllChildren()
263
+
264
+ # Import each node from the list into the new document. Keep the original formatting of the node.
265
+ import_format_mode = Rjb::import("com.aspose.words.ImportFormatMode")
266
+ importer = Rjb::import("com.aspose.words.NodeImporter").new(src_doc, dst_doc, import_format_mode.KEEP_SOURCE_FORMATTING)
267
+
268
+ i = 0
269
+ while i < nodes.size
270
+ node = nodes.get(i)
271
+ import_node = importer.importNode(node, true)
272
+ dst_doc.getFirstSection().getBody().appendChild(import_node)
273
+ i +=1
274
+ end
275
+
276
+ # Return the generated document.
277
+ dst_doc
278
+ end
279
+
280
+ def process_marker(cloneNode, nodes, node, isInclusive, isStartMarker, isEndMarker)
281
+ # If we are dealing with a block level node just see if it should be included and add it to the list.
282
+ if (!is_inline(node)) then
283
+ # Don't add the node twice if the markers are the same node
284
+ if(!(isStartMarker && isEndMarker)) then
285
+ if (isInclusive) then
286
+ nodes.add(cloneNode)
287
+ end
288
+ end
289
+ return
290
+ end
291
+
292
+ # If a marker is a FieldStart node check if it's to be included or not.
293
+ # We assume for simplicity that the FieldStart and FieldEnd appear in the same paragraph.
294
+ nodeType = Rjb::import("com.aspose.words.NodeType")
295
+ if (node.getNodeType() == nodeType.FIELD_START) then
296
+ # If the marker is a start node and is not be included then skip to the end of the field.
297
+ # If the marker is an end node and it is to be included then move to the end field so the field will not be removed.
298
+ #if ((isStartMarker && !isInclusive) || (!isStartMarker && isInclusive)) then
299
+ if ((isStartMarker && isInclusive.nil?) || (!isStartMarker && isInclusive)) then
300
+ #while (node.getNextSibling() != null && node.getNodeType() != nodeType.FIELD_END) do
301
+ while (node.getNextSibling().nil? && (node.getNodeType() != nodeType.FIELD_END)) do
302
+ node = node.getNextSibling()
303
+ end
304
+ end
305
+ end
306
+
307
+ # If either marker is part of a comment then to include the comment itself we need to move the pointer forward to the Comment
308
+ # node found after the CommentRangeEnd node.
309
+ if (node.getNodeType() == nodeType.COMMENT_RANGE_END) then
310
+ while (node.getNextSibling().nil? && (node.getNodeType() != nodeType.COMMENT)) do
311
+ node = node.getNextSibling()
312
+ end
313
+ end
314
+
315
+ # Find the corresponding node in our cloned node by index and return it.
316
+ # If the start and end node are the same some child nodes might already have been removed. Subtract the
317
+ # difference to get the right index.
318
+ indexDiff = (node.getParentNode().getChildNodes().getCount() - cloneNode.getChildNodes().getCount())
319
+
320
+ # Child node count identical.
321
+ if (indexDiff == 0) then
322
+ node = cloneNode.getChildNodes().get(node.getParentNode().indexOf(node))
323
+ else
324
+ node = cloneNode.getChildNodes().get(node.getParentNode().indexOf(node) - indexDiff)
325
+ end
326
+
327
+ # Remove the nodes up to/from the marker.
328
+ isSkip = ''
329
+ isProcessing = true
330
+ isRemoving = isStartMarker
331
+ nextNode = cloneNode.getFirstChild()
332
+ #while (isProcessing && nextNode != null) do
333
+ unless (isProcessing && nextNode.nil?)
334
+ currentNode = nextNode
335
+ isSkip = false
336
+ if (currentNode == node) then
337
+ if (isStartMarker) then
338
+ isProcessing = false
339
+ if isInclusive then
340
+ isRemoving = false
341
+ end
342
+ else
343
+ isRemoving = true
344
+ if isInclusive then
345
+ isSkip = true
346
+ end
347
+ end
348
+ end
349
+ nextNode = nextNode.getNextSibling()
350
+ #if (isRemoving && !isSkip) then
351
+ if (isRemoving && isSkip==false) then
352
+ currentNode.remove()
353
+ end
354
+ end
355
+
356
+ # After processing the composite node may become empty. If it has don't include it.
357
+ if (!(isStartMarker && isEndMarker)) then
358
+ if cloneNode.hasChildNodes() then
359
+ nodes.add(cloneNode)
360
+ end
361
+ end
362
+ end
363
+
364
+ def is_inline(node)
365
+ # Test if the node is desendant of a Paragraph or Table node and also is not a paragraph or a table a paragraph inside a comment class which is decesant of a pararaph is possible.
366
+ node_type = Rjb::import("com.aspose.words.NodeType")
367
+ #return ((node.getAncestor(node_type.PARAGRAPH) != null) || (node.getAncestor(node_type.TABLE) != null) && !(node.getNodeType() == nodeType.PARAGRAPH) || (node.getNodeType() == nodeType.TABLE))
368
+ return ((node.getAncestor(node_type.PARAGRAPH).nil?) || (node.getAncestor(node_type.TABLE).nil?) && !(node.getNodeType() == node_type.PARAGRAPH) || (node.getNodeType() == node_type.TABLE))
369
+ end
370
+
371
+ def paragraphs_by_style_name(doc, style_name)
372
+ # Create an array to collect paragraphs of the specified style.
373
+ paragraphsWithStyle = Rjb::import("java.util.ArrayList").new
374
+
375
+ # Get all paragraphs from the document.
376
+ node_type = Rjb::import("com.aspose.words.NodeType")
377
+ paragraphs = doc.getChildNodes(node_type.PARAGRAPH, true)
378
+ paragraphs_count = paragraphs.getCount()
379
+ #paragraphs_count = java_values($paragraphs_count)
380
+
381
+ # Look through all paragraphs to find those with the specified style.
382
+ i = 0
383
+ while (i < paragraphs_count) do
384
+ paragraphs = doc.getChildNodes(node_type.PARAGRAPH, true)
385
+ paragraph = paragraphs.get(i)
386
+ if (paragraph.getParagraphFormat().getStyle().getName() == style_name) then
387
+ paragraphsWithStyle.add(paragraph)
388
+ end
389
+ i = i + 1
390
+ end
391
+ paragraphsWithStyle
392
+ end
393
+
394
+ end
395
+ end