infinity-parser2 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/PKG-INFO +110 -10
  2. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/README.md +109 -9
  3. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2/__init__.py +1 -1
  4. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2/cli.py +1 -1
  5. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2/parser.py +5 -2
  6. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2.egg-info/PKG-INFO +110 -10
  7. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/setup.py +1 -1
  8. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2/__main__.py +0 -0
  9. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2/backends/__init__.py +0 -0
  10. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2/backends/base.py +0 -0
  11. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2/backends/transformers.py +0 -0
  12. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2/backends/vllm_engine.py +0 -0
  13. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2/backends/vllm_server.py +0 -0
  14. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2/prompts.py +0 -0
  15. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2/utils/__init__.py +0 -0
  16. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2/utils/file.py +0 -0
  17. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2/utils/image.py +0 -0
  18. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2/utils/model.py +0 -0
  19. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2/utils/pdf.py +0 -0
  20. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2/utils/utils.py +0 -0
  21. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2.egg-info/SOURCES.txt +0 -0
  22. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2.egg-info/dependency_links.txt +0 -0
  23. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2.egg-info/entry_points.txt +0 -0
  24. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2.egg-info/requires.txt +0 -0
  25. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/infinity_parser2.egg-info/top_level.txt +0 -0
  26. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/setup.cfg +0 -0
  27. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/tests/__init__.py +0 -0
  28. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/tests/test_backends.py +0 -0
  29. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/tests/test_parser.py +0 -0
  30. {infinity_parser2-0.2.0 → infinity_parser2-0.3.0}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: infinity_parser2
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Document parsing Python package supporting PDF and image parsing using Infinity-Parser2-Pro model.
5
5
  Home-page: https://github.com/infly-ai/INF-MLLM
6
6
  Author: INF Tech
@@ -83,7 +83,101 @@ We are excited to release Infinity-Parser2-Pro, our latest flagship document und
83
83
 
84
84
  ## Quick Start
85
85
 
86
- ### Installation
86
+ ### 1. Minimal "Hello World" (Native Transformers)
87
+
88
+ If you are looking for a minimal script to parse a single image to Markdown using the native `transformers` library, here is a simple snippet:
89
+
90
+ ```python
91
+ from PIL import Image
92
+ import torch
93
+ from transformers import AutoModelForImageTextToText, AutoProcessor
94
+ from qwen_vl_utils import process_vision_info
95
+
96
+ # Load the model and processor
97
+ model = AutoModelForImageTextToText.from_pretrained(
98
+ "infly/Infinity-Parser2-Pro",
99
+ torch_dtype="float16",
100
+ device_map="auto",
101
+ )
102
+ processor = AutoProcessor.from_pretrained("infly/Infinity-Parser2-Pro")
103
+
104
+ # Build the messages for the model
105
+ pil_image = Image.open("demo_data/demo.png").convert("RGB")
106
+ min_pixels = 2048 # 32 * 64
107
+ max_pixels = 16777216 # 4096 * 4096
108
+ prompt = """
109
+ Please output the layout information from the PDF image, including each layout element's bbox, its category, and the corresponding text content within the bbox.
110
+ 1. Bbox format: [x1, y1, x2, y2]
111
+ 2. Layout Categories: The possible categories are ['header', 'title', 'text', 'figure', 'table', 'formula', 'figure_caption', 'table_caption', 'formula_caption', 'figure_footnote', 'table_footnote', 'page_footnote', 'footer'].
112
+ 3. Text Extraction & Formatting Rules:
113
+ - Figure: For the 'figure' category, the text field should be empty string.
114
+ - Formula: Format its text as LaTeX.
115
+ - Table: Format its text as HTML.
116
+ - All Others (Text, Title, etc.): Format their text as Markdown.
117
+ 4. Constraints:
118
+ - The output text must be the original text from the image, with no translation.
119
+ - All layout elements must be sorted according to human reading order.
120
+ 5. Final Output: The entire output must be a single JSON object.
121
+ """
122
+
123
+ messages = [
124
+ {
125
+ "role": "user",
126
+ "content": [
127
+ {
128
+ "type": "image",
129
+ "image": pil_image,
130
+ "min_pixels": min_pixels,
131
+ "max_pixels": max_pixels,
132
+ },
133
+ {"type": "text", "text": prompt},
134
+ ],
135
+ }
136
+ ]
137
+
138
+ chat_template_kwargs = {"enable_thinking": False}
139
+
140
+ text = processor.apply_chat_template(
141
+ messages, tokenize=False, add_generation_prompt=True, **chat_template_kwargs
142
+ )
143
+ image_inputs, _ = process_vision_info(messages, image_patch_size=16)
144
+
145
+ inputs = processor(
146
+ text=text,
147
+ images=image_inputs,
148
+ do_resize=False,
149
+ padding=True,
150
+ return_tensors="pt",
151
+ )
152
+
153
+ # Move all tensors to the same device as the model
154
+ inputs = {
155
+ k: v.to(model.device) if isinstance(v, torch.Tensor) else v
156
+ for k, v in inputs.items()
157
+ }
158
+
159
+ # Generate the response
160
+ generated_ids = model.generate(
161
+ **inputs,
162
+ max_new_tokens=32768,
163
+ temperature=0.0,
164
+ top_p=1.0,
165
+ )
166
+
167
+ # Strip input tokens, keeping only the newly generated response
168
+ generated_ids_trimmed = [
169
+ out_ids[len(in_ids) :]
170
+ for in_ids, out_ids in zip(inputs["input_ids"], generated_ids)
171
+ ]
172
+ output_text = processor.batch_decode(
173
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
174
+ )
175
+ print(output_text)
176
+ ```
177
+
178
+ ### 2. Advanced Pipeline (infinity_parser2)
179
+
180
+ For bulk processing, advanced features, or an end-to-end PDF parsing pipeline, we recommend using our infinity_parser2 wrapper.
87
181
 
88
182
  #### Pre-requisites
89
183
 
@@ -95,10 +189,12 @@ conda activate infinity_parser2
95
189
  # Install PyTorch (CUDA). Find the proper version at https://pytorch.org/get-started/previous-versions based on your CUDA version.
96
190
  pip install torch==2.10.0 torchvision==0.25.0 torchaudio==2.10.0 --index-url https://download.pytorch.org/whl/cu128
97
191
 
98
- # Install FlashAttention (required for NVIDIA GPUs).
99
- # This command builds flash-attn from source, which can take 10 to 30 minutes.
192
+ # Install FlashAttention (FlashAttention-2 is recommended by default)
193
+ # Standard install (compiles from source, ~10-30 min):
100
194
  pip install flash-attn==2.8.3 --no-build-isolation
101
- # For Hopper GPUs (e.g. H100, H800), we recommend FlashAttention-3 instead. See the official guide at https://github.com/Dao-AILab/flash-attention.
195
+ # Faster install: download wheel from https://github.com/Dao-AILab/flash-attention/releases. Then run: pip install /path/to/<wheel_filename>.whl
196
+ # For Hopper GPUs (e.g. H100, H800), we recommend FlashAttention-3 instead. See: https://github.com/Dao-AILab/flash-attention
197
+ # NOTE: The code will prioritize detecting FlashAttention-3. If not found, it falls back to FlashAttention-2.
102
198
 
103
199
  # Install vLLM
104
200
  # NOTE: you may need to run the command below to resolve triton and numpy conflicts before installing vllm.
@@ -108,19 +204,23 @@ pip install vllm==0.17.1
108
204
 
109
205
  #### Install infinity_parser2
110
206
 
207
+ Install from PyPI
208
+
111
209
  ```bash
112
- # From PyPI
113
210
  pip install infinity_parser2
211
+ ```
114
212
 
115
- # From source
213
+ Install from source code
214
+
215
+ ```bash
116
216
  git clone https://github.com/infly-ai/INF-MLLM.git
117
217
  cd INF-MLLM/Infinity-Parser2
118
218
  pip install -e .
119
219
  ```
120
220
 
121
- ### Usage
221
+ #### Usage
122
222
 
123
- #### Command Line
223
+ ##### Command Line
124
224
 
125
225
  The `parser` command is the fastest way to get started.
126
226
 
@@ -151,7 +251,7 @@ parser demo_data/demo.png --task doc2md
151
251
  parser --help
152
252
  ```
153
253
 
154
- #### Python API
254
+ ##### Python API
155
255
 
156
256
  ```python
157
257
  # NOTE: The Infinity-Parser2 model will be automatically downloaded on the first run.
@@ -30,7 +30,101 @@ We are excited to release Infinity-Parser2-Pro, our latest flagship document und
30
30
 
31
31
  ## Quick Start
32
32
 
33
- ### Installation
33
+ ### 1. Minimal "Hello World" (Native Transformers)
34
+
35
+ If you are looking for a minimal script to parse a single image to Markdown using the native `transformers` library, here is a simple snippet:
36
+
37
+ ```python
38
+ from PIL import Image
39
+ import torch
40
+ from transformers import AutoModelForImageTextToText, AutoProcessor
41
+ from qwen_vl_utils import process_vision_info
42
+
43
+ # Load the model and processor
44
+ model = AutoModelForImageTextToText.from_pretrained(
45
+ "infly/Infinity-Parser2-Pro",
46
+ torch_dtype="float16",
47
+ device_map="auto",
48
+ )
49
+ processor = AutoProcessor.from_pretrained("infly/Infinity-Parser2-Pro")
50
+
51
+ # Build the messages for the model
52
+ pil_image = Image.open("demo_data/demo.png").convert("RGB")
53
+ min_pixels = 2048 # 32 * 64
54
+ max_pixels = 16777216 # 4096 * 4096
55
+ prompt = """
56
+ Please output the layout information from the PDF image, including each layout element's bbox, its category, and the corresponding text content within the bbox.
57
+ 1. Bbox format: [x1, y1, x2, y2]
58
+ 2. Layout Categories: The possible categories are ['header', 'title', 'text', 'figure', 'table', 'formula', 'figure_caption', 'table_caption', 'formula_caption', 'figure_footnote', 'table_footnote', 'page_footnote', 'footer'].
59
+ 3. Text Extraction & Formatting Rules:
60
+ - Figure: For the 'figure' category, the text field should be empty string.
61
+ - Formula: Format its text as LaTeX.
62
+ - Table: Format its text as HTML.
63
+ - All Others (Text, Title, etc.): Format their text as Markdown.
64
+ 4. Constraints:
65
+ - The output text must be the original text from the image, with no translation.
66
+ - All layout elements must be sorted according to human reading order.
67
+ 5. Final Output: The entire output must be a single JSON object.
68
+ """
69
+
70
+ messages = [
71
+ {
72
+ "role": "user",
73
+ "content": [
74
+ {
75
+ "type": "image",
76
+ "image": pil_image,
77
+ "min_pixels": min_pixels,
78
+ "max_pixels": max_pixels,
79
+ },
80
+ {"type": "text", "text": prompt},
81
+ ],
82
+ }
83
+ ]
84
+
85
+ chat_template_kwargs = {"enable_thinking": False}
86
+
87
+ text = processor.apply_chat_template(
88
+ messages, tokenize=False, add_generation_prompt=True, **chat_template_kwargs
89
+ )
90
+ image_inputs, _ = process_vision_info(messages, image_patch_size=16)
91
+
92
+ inputs = processor(
93
+ text=text,
94
+ images=image_inputs,
95
+ do_resize=False,
96
+ padding=True,
97
+ return_tensors="pt",
98
+ )
99
+
100
+ # Move all tensors to the same device as the model
101
+ inputs = {
102
+ k: v.to(model.device) if isinstance(v, torch.Tensor) else v
103
+ for k, v in inputs.items()
104
+ }
105
+
106
+ # Generate the response
107
+ generated_ids = model.generate(
108
+ **inputs,
109
+ max_new_tokens=32768,
110
+ temperature=0.0,
111
+ top_p=1.0,
112
+ )
113
+
114
+ # Strip input tokens, keeping only the newly generated response
115
+ generated_ids_trimmed = [
116
+ out_ids[len(in_ids) :]
117
+ for in_ids, out_ids in zip(inputs["input_ids"], generated_ids)
118
+ ]
119
+ output_text = processor.batch_decode(
120
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
121
+ )
122
+ print(output_text)
123
+ ```
124
+
125
+ ### 2. Advanced Pipeline (infinity_parser2)
126
+
127
+ For bulk processing, advanced features, or an end-to-end PDF parsing pipeline, we recommend using our infinity_parser2 wrapper.
34
128
 
35
129
  #### Pre-requisites
36
130
 
@@ -42,10 +136,12 @@ conda activate infinity_parser2
42
136
  # Install PyTorch (CUDA). Find the proper version at https://pytorch.org/get-started/previous-versions based on your CUDA version.
43
137
  pip install torch==2.10.0 torchvision==0.25.0 torchaudio==2.10.0 --index-url https://download.pytorch.org/whl/cu128
44
138
 
45
- # Install FlashAttention (required for NVIDIA GPUs).
46
- # This command builds flash-attn from source, which can take 10 to 30 minutes.
139
+ # Install FlashAttention (FlashAttention-2 is recommended by default)
140
+ # Standard install (compiles from source, ~10-30 min):
47
141
  pip install flash-attn==2.8.3 --no-build-isolation
48
- # For Hopper GPUs (e.g. H100, H800), we recommend FlashAttention-3 instead. See the official guide at https://github.com/Dao-AILab/flash-attention.
142
+ # Faster install: download wheel from https://github.com/Dao-AILab/flash-attention/releases. Then run: pip install /path/to/<wheel_filename>.whl
143
+ # For Hopper GPUs (e.g. H100, H800), we recommend FlashAttention-3 instead. See: https://github.com/Dao-AILab/flash-attention
144
+ # NOTE: The code will prioritize detecting FlashAttention-3. If not found, it falls back to FlashAttention-2.
49
145
 
50
146
  # Install vLLM
51
147
  # NOTE: you may need to run the command below to resolve triton and numpy conflicts before installing vllm.
@@ -55,19 +151,23 @@ pip install vllm==0.17.1
55
151
 
56
152
  #### Install infinity_parser2
57
153
 
154
+ Install from PyPI
155
+
58
156
  ```bash
59
- # From PyPI
60
157
  pip install infinity_parser2
158
+ ```
61
159
 
62
- # From source
160
+ Install from source code
161
+
162
+ ```bash
63
163
  git clone https://github.com/infly-ai/INF-MLLM.git
64
164
  cd INF-MLLM/Infinity-Parser2
65
165
  pip install -e .
66
166
  ```
67
167
 
68
- ### Usage
168
+ #### Usage
69
169
 
70
- #### Command Line
170
+ ##### Command Line
71
171
 
72
172
  The `parser` command is the fastest way to get started.
73
173
 
@@ -98,7 +198,7 @@ parser demo_data/demo.png --task doc2md
98
198
  parser --help
99
199
  ```
100
200
 
101
- #### Python API
201
+ ##### Python API
102
202
 
103
203
  ```python
104
204
  # NOTE: The Infinity-Parser2 model will be automatically downloaded on the first run.
@@ -1,6 +1,6 @@
1
1
  """Infinity-Parser2: Document parsing Python package."""
2
2
 
3
- __version__ = "0.2.0"
3
+ __version__ = "0.3.0"
4
4
 
5
5
  from .parser import InfinityParser2
6
6
  from .backends import (
@@ -136,7 +136,7 @@ Examples:
136
136
  parser.add_argument(
137
137
  "--version",
138
138
  action="version",
139
- version="Infinity-Parser2 0.2.0",
139
+ version="Infinity-Parser2 0.3.0",
140
140
  )
141
141
 
142
142
  return parser
@@ -86,8 +86,11 @@ class InfinityParser2:
86
86
  self.kwargs = kwargs
87
87
 
88
88
  # Initialize model cache and resolve model path (stored separately)
89
- cache = get_model_cache(model_cache_dir)
90
- self._model_path = cache.resolve_model_path(self.model_name)
89
+ if self.backend_name == "vllm-server":
90
+ self._model_path = self.model_name
91
+ else:
92
+ cache = get_model_cache(model_cache_dir)
93
+ self._model_path = cache.resolve_model_path(self.model_name)
91
94
 
92
95
  self._backend: BaseBackend = self._init_backend()
93
96
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: infinity_parser2
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Document parsing Python package supporting PDF and image parsing using Infinity-Parser2-Pro model.
5
5
  Home-page: https://github.com/infly-ai/INF-MLLM
6
6
  Author: INF Tech
@@ -83,7 +83,101 @@ We are excited to release Infinity-Parser2-Pro, our latest flagship document und
83
83
 
84
84
  ## Quick Start
85
85
 
86
- ### Installation
86
+ ### 1. Minimal "Hello World" (Native Transformers)
87
+
88
+ If you are looking for a minimal script to parse a single image to Markdown using the native `transformers` library, here is a simple snippet:
89
+
90
+ ```python
91
+ from PIL import Image
92
+ import torch
93
+ from transformers import AutoModelForImageTextToText, AutoProcessor
94
+ from qwen_vl_utils import process_vision_info
95
+
96
+ # Load the model and processor
97
+ model = AutoModelForImageTextToText.from_pretrained(
98
+ "infly/Infinity-Parser2-Pro",
99
+ torch_dtype="float16",
100
+ device_map="auto",
101
+ )
102
+ processor = AutoProcessor.from_pretrained("infly/Infinity-Parser2-Pro")
103
+
104
+ # Build the messages for the model
105
+ pil_image = Image.open("demo_data/demo.png").convert("RGB")
106
+ min_pixels = 2048 # 32 * 64
107
+ max_pixels = 16777216 # 4096 * 4096
108
+ prompt = """
109
+ Please output the layout information from the PDF image, including each layout element's bbox, its category, and the corresponding text content within the bbox.
110
+ 1. Bbox format: [x1, y1, x2, y2]
111
+ 2. Layout Categories: The possible categories are ['header', 'title', 'text', 'figure', 'table', 'formula', 'figure_caption', 'table_caption', 'formula_caption', 'figure_footnote', 'table_footnote', 'page_footnote', 'footer'].
112
+ 3. Text Extraction & Formatting Rules:
113
+ - Figure: For the 'figure' category, the text field should be empty string.
114
+ - Formula: Format its text as LaTeX.
115
+ - Table: Format its text as HTML.
116
+ - All Others (Text, Title, etc.): Format their text as Markdown.
117
+ 4. Constraints:
118
+ - The output text must be the original text from the image, with no translation.
119
+ - All layout elements must be sorted according to human reading order.
120
+ 5. Final Output: The entire output must be a single JSON object.
121
+ """
122
+
123
+ messages = [
124
+ {
125
+ "role": "user",
126
+ "content": [
127
+ {
128
+ "type": "image",
129
+ "image": pil_image,
130
+ "min_pixels": min_pixels,
131
+ "max_pixels": max_pixels,
132
+ },
133
+ {"type": "text", "text": prompt},
134
+ ],
135
+ }
136
+ ]
137
+
138
+ chat_template_kwargs = {"enable_thinking": False}
139
+
140
+ text = processor.apply_chat_template(
141
+ messages, tokenize=False, add_generation_prompt=True, **chat_template_kwargs
142
+ )
143
+ image_inputs, _ = process_vision_info(messages, image_patch_size=16)
144
+
145
+ inputs = processor(
146
+ text=text,
147
+ images=image_inputs,
148
+ do_resize=False,
149
+ padding=True,
150
+ return_tensors="pt",
151
+ )
152
+
153
+ # Move all tensors to the same device as the model
154
+ inputs = {
155
+ k: v.to(model.device) if isinstance(v, torch.Tensor) else v
156
+ for k, v in inputs.items()
157
+ }
158
+
159
+ # Generate the response
160
+ generated_ids = model.generate(
161
+ **inputs,
162
+ max_new_tokens=32768,
163
+ temperature=0.0,
164
+ top_p=1.0,
165
+ )
166
+
167
+ # Strip input tokens, keeping only the newly generated response
168
+ generated_ids_trimmed = [
169
+ out_ids[len(in_ids) :]
170
+ for in_ids, out_ids in zip(inputs["input_ids"], generated_ids)
171
+ ]
172
+ output_text = processor.batch_decode(
173
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
174
+ )
175
+ print(output_text)
176
+ ```
177
+
178
+ ### 2. Advanced Pipeline (infinity_parser2)
179
+
180
+ For bulk processing, advanced features, or an end-to-end PDF parsing pipeline, we recommend using our infinity_parser2 wrapper.
87
181
 
88
182
  #### Pre-requisites
89
183
 
@@ -95,10 +189,12 @@ conda activate infinity_parser2
95
189
  # Install PyTorch (CUDA). Find the proper version at https://pytorch.org/get-started/previous-versions based on your CUDA version.
96
190
  pip install torch==2.10.0 torchvision==0.25.0 torchaudio==2.10.0 --index-url https://download.pytorch.org/whl/cu128
97
191
 
98
- # Install FlashAttention (required for NVIDIA GPUs).
99
- # This command builds flash-attn from source, which can take 10 to 30 minutes.
192
+ # Install FlashAttention (FlashAttention-2 is recommended by default)
193
+ # Standard install (compiles from source, ~10-30 min):
100
194
  pip install flash-attn==2.8.3 --no-build-isolation
101
- # For Hopper GPUs (e.g. H100, H800), we recommend FlashAttention-3 instead. See the official guide at https://github.com/Dao-AILab/flash-attention.
195
+ # Faster install: download wheel from https://github.com/Dao-AILab/flash-attention/releases. Then run: pip install /path/to/<wheel_filename>.whl
196
+ # For Hopper GPUs (e.g. H100, H800), we recommend FlashAttention-3 instead. See: https://github.com/Dao-AILab/flash-attention
197
+ # NOTE: The code will prioritize detecting FlashAttention-3. If not found, it falls back to FlashAttention-2.
102
198
 
103
199
  # Install vLLM
104
200
  # NOTE: you may need to run the command below to resolve triton and numpy conflicts before installing vllm.
@@ -108,19 +204,23 @@ pip install vllm==0.17.1
108
204
 
109
205
  #### Install infinity_parser2
110
206
 
207
+ Install from PyPI
208
+
111
209
  ```bash
112
- # From PyPI
113
210
  pip install infinity_parser2
211
+ ```
114
212
 
115
- # From source
213
+ Install from source code
214
+
215
+ ```bash
116
216
  git clone https://github.com/infly-ai/INF-MLLM.git
117
217
  cd INF-MLLM/Infinity-Parser2
118
218
  pip install -e .
119
219
  ```
120
220
 
121
- ### Usage
221
+ #### Usage
122
222
 
123
- #### Command Line
223
+ ##### Command Line
124
224
 
125
225
  The `parser` command is the fastest way to get started.
126
226
 
@@ -151,7 +251,7 @@ parser demo_data/demo.png --task doc2md
151
251
  parser --help
152
252
  ```
153
253
 
154
- #### Python API
254
+ ##### Python API
155
255
 
156
256
  ```python
157
257
  # NOTE: The Infinity-Parser2 model will be automatically downloaded on the first run.
@@ -32,7 +32,7 @@ install_requires = [
32
32
 
33
33
  setup(
34
34
  name="infinity_parser2",
35
- version="0.2.0",
35
+ version="0.3.0",
36
36
  description="Document parsing Python package supporting PDF and image parsing using Infinity-Parser2-Pro model.",
37
37
  long_description=open("README.md", "r", encoding="utf-8").read(),
38
38
  long_description_content_type="text/markdown",